	.version 2.2
	.target sm_20
	// compiled with ../../../External/3rdParty/NVIDIA/CUDA/win/bin/../open64/lib//be.exe
	// nvopencc 3.2 built on 2010-11-04

	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])

	.visible .func (.param .f32 __cudaretf__Z10GetLuma6018PixelRGB) _Z10GetLuma6018PixelRGB (.param .align 16 .b8 __cudaparmf1__Z10GetLuma6018PixelRGB[16])

	.visible .func (.param .f32 __cudaretf__Z10GetLuma7098PixelRGB) _Z10GetLuma7098PixelRGB (.param .align 16 .b8 __cudaparmf1__Z10GetLuma7098PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z15LinearComposite8PixelRGBS_bf[16]) _Z15LinearComposite8PixelRGBS_bf (.param .align 16 .b8 __cudaparmf1__Z15LinearComposite8PixelRGBS_bf[16], .param .align 16 .b8 __cudaparmf2__Z15LinearComposite8PixelRGBS_bf[16], .param .s32 __cudaparmf3__Z15LinearComposite8PixelRGBS_bf, .param .f32 __cudaparmf4__Z15LinearComposite8PixelRGBS_bf)

	.visible .func (.param .align 16 .b8 __cudaretf__Z9ClipColor8PixelRGB[16]) _Z9ClipColor8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z9ClipColor8PixelRGB[16])

	.visible .func (.param .f32 __cudaretf__Z3MinIfET_S0_S0_) _Z3MinIfET_S0_S0_ (.param .f32 __cudaparmf1__Z3MinIfET_S0_S0_, .param .f32 __cudaparmf2__Z3MinIfET_S0_S0_)

	.visible .func (.param .f32 __cudaretf__Z3MaxIfET_S0_S0_) _Z3MaxIfET_S0_S0_ (.param .f32 __cudaparmf1__Z3MaxIfET_S0_S0_, .param .f32 __cudaparmf2__Z3MaxIfET_S0_S0_)

	.visible .func (.param .align 16 .b8 __cudaretf__Z7SetLuma8PixelRGBf[16]) _Z7SetLuma8PixelRGBf (.param .align 16 .b8 __cudaparmf1__Z7SetLuma8PixelRGBf[16], .param .f32 __cudaparmf2__Z7SetLuma8PixelRGBf)

	.visible .func (.param .f32 __cudaretf__Z13GetSaturation8PixelRGB) _Z13GetSaturation8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z13GetSaturation8PixelRGB[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z13SetSaturation8PixelRGBf[16]) _Z13SetSaturation8PixelRGBf (.param .align 16 .b8 __cudaparmf1__Z13SetSaturation8PixelRGBf[16], .param .f32 __cudaparmf2__Z13SetSaturation8PixelRGBf)

	.visible .func (.param .f32 __cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff) _Z39BlendMode_ChannelFn_IR_BlendMode_Normalff (.param .f32 __cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff, .param .f32 __cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff)

	.visible .func (.param .f32 __cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff) _Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff (.param .f32 __cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff, .param .f32 __cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff)

	.visible .func (.param .f32 __cudaretf__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff) _Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff (.param .f32 __cudaparmf1__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff, .param .f32 __cudaparmf2__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff)

	.visible .func (.param .f32 __cudaretf__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff) _Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff (.param .f32 __cudaparmf1__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff, .param .f32 __cudaparmf2__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff)

	.visible .func (.param .f32 __cudaretf__Z5ClampIfET_S0_S0_S0_) _Z5ClampIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z5ClampIfET_S0_S0_S0_)

	.visible .func (.param .f32 __cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff) _Z39BlendMode_ChannelFn_IR_BlendMode_Screenff (.param .f32 __cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff, .param .f32 __cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff)

	.visible .func (.param .f32 __cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff) _Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff (.param .f32 __cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff, .param .f32 __cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff)

	.visible .func (.param .f32 __cudaretf__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff) _Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff (.param .f32 __cudaparmf1__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff, .param .f32 __cudaparmf2__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff)

	.visible .func (.param .f32 __cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff) _Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff (.param .f32 __cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff, .param .f32 __cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff)

	.visible .func (.param .f32 __cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff) _Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff (.param .f32 __cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff, .param .f32 __cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff)

	.visible .func (.param .f32 __cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff) _Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff (.param .f32 __cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff, .param .f32 __cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff)

	.visible .func (.param .f32 __cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff) _Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff (.param .f32 __cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff, .param .f32 __cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff)

	.visible .func (.param .f32 __cudaretf__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff) _Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff (.param .f32 __cudaparmf1__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff, .param .f32 __cudaparmf2__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff)

	.visible .func (.param .f32 __cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff) _Z39BlendMode_ChannelFn_IR_BlendMode_Divideff (.param .f32 __cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff, .param .f32 __cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff)

	.visible .func (.param .f32 __cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff) _Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff (.param .f32 __cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff, .param .f32 __cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff)

	.visible .func (.param .f32 __cudaretf__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff) _Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff (.param .f32 __cudaparmf1__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff, .param .f32 __cudaparmf2__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff)

	.visible .func (.param .f32 __cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff) _Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff (.param .f32 __cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff, .param .f32 __cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff)

	.visible .func (.param .f32 __cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff) _Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff (.param .f32 __cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff, .param .f32 __cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff)

	.visible .func (.param .f32 __cudaretf__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff) _Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff (.param .f32 __cudaparmf1__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff, .param .f32 __cudaparmf2__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff)

	.visible .func (.param .f32 __cudaretf__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff) _Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff (.param .f32 __cudaparmf1__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff, .param .f32 __cudaparmf2__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff)

	.visible .func (.param .f32 __cudaretf__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff) _Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff (.param .f32 __cudaparmf1__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff, .param .f32 __cudaparmf2__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff)

	.visible .func (.param .align 16 .b8 __cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi[16]) _Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi, .param .s32 __cudaparmf4__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi[16]) _Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi, .param .s32 __cudaparmf4__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi[16]) _Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi, .param .s32 __cudaparmf4__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi[16]) _Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi, .param .s32 __cudaparmf4__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi[16]) _Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi, .param .s32 __cudaparmf4__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi[16]) _Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi, .param .s32 __cudaparmf4__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi[16]) _Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi, .param .s32 __cudaparmf4__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi[16]) _Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi, .param .s32 __cudaparmf4__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi[16]) _Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi[16]) _Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi[16]) _Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi[16]) _Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi[16]) _Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi, .param .s32 __cudaparmf4__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi[16]) _Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi, .param .s32 __cudaparmf4__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi[16]) _Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi, .param .s32 __cudaparmf4__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi[16]) _Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi, .param .s32 __cudaparmf4__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi[16]) _Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi, .param .s32 __cudaparmf4__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi[16]) _Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi, .param .s32 __cudaparmf4__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi[16]) _Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi, .param .s32 __cudaparmf4__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi[16]) _Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi, .param .s32 __cudaparmf4__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi[16]) _Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi, .param .s32 __cudaparmf4__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi[16]) _Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi, .param .s32 __cudaparmf4__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi[16]) _Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi, .param .s32 __cudaparmf4__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi[16]) _Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi, .param .s32 __cudaparmf4__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi)

	.visible .func (.param .align 16 .b8 __cudaretf__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi[16]) _Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi, .param .s32 __cudaparmf4__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi)

	.visible .func (.param .f32 __cudaretf__Z8distanceff6float2S_) _Z8distanceff6float2S_ (.param .f32 __cudaparmf1__Z8distanceff6float2S_, .param .f32 __cudaparmf2__Z8distanceff6float2S_, .param .align 8 .b8 __cudaparmf3__Z8distanceff6float2S_[8], .param .align 8 .b8 __cudaparmf4__Z8distanceff6float2S_[8])

	.visible .func (.param .align 16 .b8 __cudaretf__Z3sum6float4S_[16]) _Z3sum6float4S_ (.param .align 16 .b8 __cudaparmf1__Z3sum6float4S_[16], .param .align 16 .b8 __cudaparmf2__Z3sum6float4S_[16])

	.visible .func (.param .align 16 .b8 __cudaretf__Z6weight6float4f[16]) _Z6weight6float4f (.param .align 16 .b8 __cudaparmf1__Z6weight6float4f[16], .param .f32 __cudaparmf2__Z6weight6float4f)

	.visible .func (.param .align 4 .b8 __cudaretf__Z9normalize6float3[12]) _Z9normalize6float3 (.param .align 4 .b8 __cudaparmf1__Z9normalize6float3[12])

	.visible .func (.param .align 4 .b8 __cudaretf__Z6vector6float3S_[12]) _Z6vector6float3S_ (.param .align 4 .b8 __cudaparmf1__Z6vector6float3S_[12], .param .align 4 .b8 __cudaparmf2__Z6vector6float3S_[12])

	.visible .func (.param .align 4 .b8 __cudaretf__Z12crossProduct6float3S_[12]) _Z12crossProduct6float3S_ (.param .align 4 .b8 __cudaparmf1__Z12crossProduct6float3S_[12], .param .align 4 .b8 __cudaparmf2__Z12crossProduct6float3S_[12])

	.visible .func (.param .f32 __cudaretf__Z12innerProduct6float3S_) _Z12innerProduct6float3S_ (.param .align 4 .b8 __cudaparmf1__Z12innerProduct6float3S_[12], .param .align 4 .b8 __cudaparmf2__Z12innerProduct6float3S_[12])

	.visible .func _Z18TransformDestToSrcPfS_iiiiffffffff (.param .u64 __cudaparmf1__Z18TransformDestToSrcPfS_iiiiffffffff, .param .u64 __cudaparmf2__Z18TransformDestToSrcPfS_iiiiffffffff, .param .s32 __cudaparmf3__Z18TransformDestToSrcPfS_iiiiffffffff, .param .s32 __cudaparmf4__Z18TransformDestToSrcPfS_iiiiffffffff, .param .s32 __cudaparmf5__Z18TransformDestToSrcPfS_iiiiffffffff, .param .s32 __cudaparmf6__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf7__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf8__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf9__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf10__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf11__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf12__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf13__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf14__Z18TransformDestToSrcPfS_iiiiffffffff)

	.visible .func _Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb (.param .align 16 .b8 __cudaparmf1__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb[16], .param .u64 __cudaparmf2__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf3__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .u32 __cudaparmf4__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf5__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf6__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf7__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf8__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .f32 __cudaparmf9__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .u32 __cudaparmf10__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf11__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf12__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb)

	.visible .func (.param .align 16 .b8 __cudaretf__Z13sampleTextureff[16]) _Z13sampleTextureff (.param .f32 __cudaparmf1__Z13sampleTextureff, .param .f32 __cudaparmf2__Z13sampleTextureff)

	.visible .func (.param .f32 __cudaretf__Z5GetW0f) _Z5GetW0f (.param .f32 __cudaparmf1__Z5GetW0f)

	.visible .func (.param .f32 __cudaretf__Z5GetW1f) _Z5GetW1f (.param .f32 __cudaparmf1__Z5GetW1f)

	.visible .func (.param .align 16 .b8 __cudaretf__Z12bicubicTex2Dff[16]) _Z12bicubicTex2Dff (.param .f32 __cudaparmf1__Z12bicubicTex2Dff, .param .f32 __cudaparmf2__Z12bicubicTex2Dff)

	.visible .func (.param .align 8 .b8 __cudaretf__Z9ComputeUV6float3S_S_S_ff[8]) _Z9ComputeUV6float3S_S_S_ff (.param .align 4 .b8 __cudaparmf1__Z9ComputeUV6float3S_S_S_ff[12], .param .align 4 .b8 __cudaparmf2__Z9ComputeUV6float3S_S_S_ff[12], .param .align 4 .b8 __cudaparmf3__Z9ComputeUV6float3S_S_S_ff[12], .param .align 4 .b8 __cudaparmf4__Z9ComputeUV6float3S_S_S_ff[12], .param .f32 __cudaparmf5__Z9ComputeUV6float3S_S_S_ff, .param .f32 __cudaparmf6__Z9ComputeUV6float3S_S_S_ff)

	.visible .func (.param .f32 __cudaretf__Z24CubicInterpolationKernelff) _Z24CubicInterpolationKernelff (.param .f32 __cudaparmf1__Z24CubicInterpolationKernelff, .param .f32 __cudaparmf2__Z24CubicInterpolationKernelff)

	//-----------------------------------------------------------
	// Compiling C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003f68_00000000-11_Motion.cpp3.i (C:/Users/dvaeng/AppData/Local/Temp/ccBI#.a12388)
	//-----------------------------------------------------------

	//-----------------------------------------------------------
	// Options:
	//-----------------------------------------------------------
	//  Target:ptx, ISA:sm_20, Endian:little, Pointer Size:64
	//  -O3	(Optimization level)
	//  -g0	(Debug level)
	//  -m2	(Report advisories)
	//-----------------------------------------------------------

	.file	1	"C:/Users/dvaeng/AppData/Local/Temp/tmpxft_00003f68_00000000-10_Motion.cudafe2.gpu"
	.file	2	"c:\Mulder64\shared\adobe\dvamediatypes/RenderQuality.h"
	.file	3	"c:\Mulder64\shared\adobe\MediaCore\ImageRenderer\API\Inc\ImageRenderer.h"
	.file	4	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/PixelFormat.h"
	.file	5	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/PixelRGB.h"
	.file	6	"c:/Mulder64/shared/adobe/MediaCore/GPUFoundation/Src/ImageProcessing/Motion.cu"
	.file	7	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\crtdefs.h"
	.file	8	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\crt/device_runtime.h"
	.file	9	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\host_defines.h"
	.file	10	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\builtin_types.h"
	.file	11	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_types.h"
	.file	12	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\driver_types.h"
	.file	13	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_types.h"
	.file	14	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_types.h"
	.file	15	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\vector_types.h"
	.file	16	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\builtin_types.h"
	.file	17	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\host_defines.h"
	.file	18	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\device_launch_parameters.h"
	.file	19	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\crt\storage_class.h"
	.file	20	"C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include\time.h"
	.file	21	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/Utils.h"
	.file	22	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\Inc\ImageProcessing/Composite.h"
	.file	23	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/Numeric.h"
	.file	24	"c:\Mulder64\shared\adobe\MediaCore\External\3rdParty\NVIDIA\CUDA\win\include\common_functions.h"
	.file	25	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions.h"
	.file	26	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_constants.h"
	.file	27	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\device_functions.h"
	.file	28	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_11_atomic_functions.h"
	.file	29	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_12_atomic_functions.h"
	.file	30	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_13_double_functions.h"
	.file	31	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_atomic_functions.h"
	.file	32	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\sm_20_intrinsics.h"
	.file	33	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\surface_functions.h"
	.file	34	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\texture_fetch_functions.h"
	.file	35	"c:\mulder64\shared\adobe\mediacore\external\3rdparty\nvidia\cuda\win\include\math_functions_dbl_ptx3.h"
	.file	36	"c:\Mulder64\shared\adobe\MediaCore\GPUFoundation\API\Inc\GPUFoundation/KernelSupport/ColorSpaceConvert.h"


	.visible .func (.param .s32 __cudaretf__Z15IntegerMultiplyii) _Z15IntegerMultiplyii (.param .s32 __cudaparmf1__Z15IntegerMultiplyii, .param .s32 __cudaparmf2__Z15IntegerMultiplyii)
	{
	.reg .u32 %r<7>;
	.loc	21	60	0
$LDWbegin__Z15IntegerMultiplyii:
	ld.param.u32 	%r1, [__cudaparmf1__Z15IntegerMultiplyii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z15IntegerMultiplyii];
	mov.s32 	%r4, %r3;
	.loc	21	64	0
	mul.lo.s32 	%r5, %r2, %r4;
	st.param.s32 	[__cudaretf__Z15IntegerMultiplyii], %r5;
	ret;
$LDWend__Z15IntegerMultiplyii:
	} // _Z15IntegerMultiplyii

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelXv) _Z17Standard2DKernelXv ()
	{
	.reg .u32 %r<7>;
	.loc	21	73	0
$LDWbegin__Z17Standard2DKernelXv:
	.loc	21	74	0
	mov.u32 	%r1, %tid.x;
	cvt.s32.u32 	%r2, %ctaid.x;
	cvt.s32.u32 	%r3, %ntid.x;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelXv], %r5;
	ret;
$LDWend__Z17Standard2DKernelXv:
	} // _Z17Standard2DKernelXv

	.visible .func (.param .s32 __cudaretf__Z17Standard2DKernelYv) _Z17Standard2DKernelYv ()
	{
	.reg .u32 %r<7>;
	.loc	21	77	0
$LDWbegin__Z17Standard2DKernelYv:
	.loc	21	78	0
	mov.u32 	%r1, %tid.y;
	cvt.s32.u32 	%r2, %ctaid.y;
	cvt.s32.u32 	%r3, %ntid.y;
	mul.lo.s32 	%r4, %r2, %r3;
	add.u32 	%r5, %r1, %r4;
	st.param.s32 	[__cudaretf__Z17Standard2DKernelYv], %r5;
	ret;
$LDWend__Z17Standard2DKernelYv:
	} // _Z17Standard2DKernelYv

	.visible .func (.param .align 16 .b8 __cudaretf__Z13Half4ToFloat47ushort4[16]) _Z13Half4ToFloat47ushort4 (.param .align 8 .b8 __cudaparmf1__Z13Half4ToFloat47ushort4[8])
	{
	.reg .u32 %r<14>;
	.reg .f32 %f<9>;
	.loc	21	86	0
$LDWbegin__Z13Half4ToFloat47ushort4:
	ld.param.u16 	%r1, [__cudaparmf1__Z13Half4ToFloat47ushort4+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z13Half4ToFloat47ushort4+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z13Half4ToFloat47ushort4+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z13Half4ToFloat47ushort4+6];
	mov.s32 	%r8, %r7;
	.loc	21	87	0
	cvt.u16.u32 	%r9, %r4;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r9;
	cvt.ftz.f32.f16	%f1, %b1; }
	cvt.u16.u32 	%r10, %r6;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r10;
	cvt.ftz.f32.f16	%f2, %b1; }
	cvt.u16.u32 	%r11, %r8;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r11;
	cvt.ftz.f32.f16	%f3, %b1; }
	cvt.u16.u32 	%r12, %r2;
	{ .reg .b32 %b1;
	mov.b32		%b1, %r12;
	cvt.ftz.f32.f16	%f4, %b1; }
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+0], %f4;
	mov.f32 	%f5, %f1;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+4], %f5;
	mov.f32 	%f6, %f2;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+8], %f6;
	mov.f32 	%f7, %f3;
	st.param.f32 	[__cudaretf__Z13Half4ToFloat47ushort4+12], %f7;
	ret;
$LDWend__Z13Half4ToFloat47ushort4:
	} // _Z13Half4ToFloat47ushort4

	.visible .func (.param .align 8 .b8 __cudaretf__Z13Float4ToHalf46float4[8]) _Z13Float4ToHalf46float4 (.param .align 16 .b8 __cudaparmf1__Z13Float4ToHalf46float4[16])
	{
	.reg .u32 %r<13>;
	.reg .f32 %f<10>;
	.loc	21	95	0
$LDWbegin__Z13Float4ToHalf46float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z13Float4ToHalf46float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13Float4ToHalf46float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13Float4ToHalf46float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z13Float4ToHalf46float4+12];
	mov.f32 	%f8, %f7;
	.loc	21	96	0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r1, %b1; }
	cvt.u16.u32 	%r2, %r1;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r3, %b1; }
	cvt.u16.u32 	%r4, %r3;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r5, %b1; }
	cvt.u16.u32 	%r6, %r5;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r7, %b1; }
	cvt.u16.u32 	%r8, %r7;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+0], %r8;
	mov.s32 	%r9, %r2;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+2], %r9;
	mov.s32 	%r10, %r4;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+4], %r10;
	mov.s32 	%r11, %r6;
	st.param.u16 	[__cudaretf__Z13Float4ToHalf46float4+6], %r11;
	ret;
$LDWend__Z13Float4ToHalf46float4:
	} // _Z13Float4ToHalf46float4

	.visible .func (.param .u32 __cudaretf__Z4Mix3RjS_S_) _Z4Mix3RjS_S_ (.param .u64 __cudaparmf1__Z4Mix3RjS_S_, .param .u64 __cudaparmf2__Z4Mix3RjS_S_, .param .u64 __cudaparmf3__Z4Mix3RjS_S_)
	{
	.reg .u32 %r<75>;
	.reg .u64 %rd<8>;
	.loc	21	138	0
$LDWbegin__Z4Mix3RjS_S_:
	ld.param.u64 	%rd1, [__cudaparmf1__Z4Mix3RjS_S_];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z4Mix3RjS_S_];
	mov.s64 	%rd4, %rd3;
	ld.param.u64 	%rd5, [__cudaparmf3__Z4Mix3RjS_S_];
	mov.s64 	%rd6, %rd5;
	.loc	21	139	0
	ld.u32 	%r1, [%rd2+0];
	ld.u32 	%r2, [%rd4+0];
	sub.u32 	%r3, %r1, %r2;
	st.u32 	[%rd2+0], %r3;
	ld.u32 	%r4, [%rd6+0];
	sub.u32 	%r5, %r3, %r4;
	st.u32 	[%rd2+0], %r5;
	ld.u32 	%r6, [%rd6+0];
	shr.u32 	%r7, %r6, 13;
	xor.b32 	%r8, %r5, %r7;
	st.u32 	[%rd2+0], %r8;
	.loc	21	140	0
	ld.u32 	%r9, [%rd4+0];
	ld.u32 	%r10, [%rd6+0];
	sub.u32 	%r11, %r9, %r10;
	st.u32 	[%rd4+0], %r11;
	ld.u32 	%r12, [%rd2+0];
	sub.u32 	%r13, %r11, %r12;
	st.u32 	[%rd4+0], %r13;
	ld.u32 	%r14, [%rd2+0];
	shl.b32 	%r15, %r14, 8;
	xor.b32 	%r16, %r13, %r15;
	st.u32 	[%rd4+0], %r16;
	.loc	21	141	0
	ld.u32 	%r17, [%rd6+0];
	ld.u32 	%r18, [%rd2+0];
	sub.u32 	%r19, %r17, %r18;
	st.u32 	[%rd6+0], %r19;
	ld.u32 	%r20, [%rd4+0];
	sub.u32 	%r21, %r19, %r20;
	st.u32 	[%rd6+0], %r21;
	ld.u32 	%r22, [%rd4+0];
	shr.u32 	%r23, %r22, 13;
	xor.b32 	%r24, %r21, %r23;
	st.u32 	[%rd6+0], %r24;
	.loc	21	142	0
	ld.u32 	%r25, [%rd2+0];
	ld.u32 	%r26, [%rd4+0];
	sub.u32 	%r27, %r25, %r26;
	st.u32 	[%rd2+0], %r27;
	ld.u32 	%r28, [%rd6+0];
	sub.u32 	%r29, %r27, %r28;
	st.u32 	[%rd2+0], %r29;
	ld.u32 	%r30, [%rd6+0];
	shr.u32 	%r31, %r30, 12;
	xor.b32 	%r32, %r29, %r31;
	st.u32 	[%rd2+0], %r32;
	.loc	21	143	0
	ld.u32 	%r33, [%rd4+0];
	ld.u32 	%r34, [%rd6+0];
	sub.u32 	%r35, %r33, %r34;
	st.u32 	[%rd4+0], %r35;
	ld.u32 	%r36, [%rd2+0];
	sub.u32 	%r37, %r35, %r36;
	st.u32 	[%rd4+0], %r37;
	ld.u32 	%r38, [%rd2+0];
	shl.b32 	%r39, %r38, 16;
	xor.b32 	%r40, %r37, %r39;
	st.u32 	[%rd4+0], %r40;
	.loc	21	144	0
	ld.u32 	%r41, [%rd6+0];
	ld.u32 	%r42, [%rd2+0];
	sub.u32 	%r43, %r41, %r42;
	st.u32 	[%rd6+0], %r43;
	ld.u32 	%r44, [%rd4+0];
	sub.u32 	%r45, %r43, %r44;
	st.u32 	[%rd6+0], %r45;
	ld.u32 	%r46, [%rd4+0];
	shr.u32 	%r47, %r46, 5;
	xor.b32 	%r48, %r45, %r47;
	st.u32 	[%rd6+0], %r48;
	.loc	21	145	0
	ld.u32 	%r49, [%rd2+0];
	ld.u32 	%r50, [%rd4+0];
	sub.u32 	%r51, %r49, %r50;
	st.u32 	[%rd2+0], %r51;
	ld.u32 	%r52, [%rd6+0];
	sub.u32 	%r53, %r51, %r52;
	st.u32 	[%rd2+0], %r53;
	ld.u32 	%r54, [%rd6+0];
	shr.u32 	%r55, %r54, 3;
	xor.b32 	%r56, %r53, %r55;
	st.u32 	[%rd2+0], %r56;
	.loc	21	146	0
	ld.u32 	%r57, [%rd4+0];
	ld.u32 	%r58, [%rd6+0];
	sub.u32 	%r59, %r57, %r58;
	st.u32 	[%rd4+0], %r59;
	ld.u32 	%r60, [%rd2+0];
	sub.u32 	%r61, %r59, %r60;
	st.u32 	[%rd4+0], %r61;
	ld.u32 	%r62, [%rd2+0];
	shl.b32 	%r63, %r62, 10;
	xor.b32 	%r64, %r61, %r63;
	st.u32 	[%rd4+0], %r64;
	.loc	21	147	0
	ld.u32 	%r65, [%rd6+0];
	ld.u32 	%r66, [%rd2+0];
	sub.u32 	%r67, %r65, %r66;
	st.u32 	[%rd6+0], %r67;
	ld.u32 	%r68, [%rd4+0];
	sub.u32 	%r69, %r67, %r68;
	st.u32 	[%rd6+0], %r69;
	ld.u32 	%r70, [%rd4+0];
	shr.u32 	%r71, %r70, 15;
	xor.b32 	%r72, %r69, %r71;
	st.u32 	[%rd6+0], %r72;
	.loc	21	148	0
	mov.s32 	%r73, %r72;
	st.param.u32 	[__cudaretf__Z4Mix3RjS_S_], %r73;
	ret;
$LDWend__Z4Mix3RjS_S_:
	} // _Z4Mix3RjS_S_

	.visible .func (.param .s32 __cudaretf__Z4Randj) _Z4Randj (.param .u32 __cudaparmf1__Z4Randj)
	{
	.reg .u32 %r<14>;
	.loc	21	152	0
$LDWbegin__Z4Randj:
	ld.param.u32 	%r1, [__cudaparmf1__Z4Randj];
	mov.s32 	%r2, %r1;
	.loc	21	163	0
	mul.lo.u32 	%r3, %r2, 1103515245;
	add.u32 	%r4, %r3, 12345;
	shr.u32 	%r5, %r4, 16;
	and.b32 	%r6, %r5, 255;
	shl.b32 	%r7, %r6, 7;
	mul.lo.u32 	%r8, %r2, -1029531031;
	sub.u32 	%r9, %r8, 740551042;
	shr.u32 	%r10, %r9, 16;
	and.b32 	%r11, %r10, 255;
	xor.b32 	%r12, %r7, %r11;
	st.param.s32 	[__cudaretf__Z4Randj], %r12;
	ret;
$LDWend__Z4Randj:
	} // _Z4Randj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Djjj) _Z6Rand2Djjj (.param .u32 __cudaparmf1__Z6Rand2Djjj, .param .u32 __cudaparmf2__Z6Rand2Djjj, .param .u32 __cudaparmf3__Z6Rand2Djjj)
	{
	.reg .u32 %r<54>;
	.loc	21	169	0
$LDWbegin__Z6Rand2Djjj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Djjj];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf2__Z6Rand2Djjj];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf3__Z6Rand2Djjj];
	mov.s32 	%r6, %r5;
	.loc	21	139	0
	sub.u32 	%r7, %r2, %r4;
	sub.u32 	%r8, %r7, %r6;
	shr.u32 	%r9, %r6, 13;
	xor.b32 	%r10, %r8, %r9;
	.loc	21	140	0
	sub.u32 	%r11, %r4, %r6;
	sub.u32 	%r12, %r11, %r10;
	shl.b32 	%r13, %r10, 8;
	xor.b32 	%r14, %r12, %r13;
	.loc	21	141	0
	sub.u32 	%r15, %r6, %r10;
	sub.u32 	%r16, %r15, %r14;
	shr.u32 	%r17, %r14, 13;
	xor.b32 	%r18, %r16, %r17;
	.loc	21	142	0
	sub.u32 	%r19, %r10, %r14;
	sub.u32 	%r20, %r19, %r18;
	shr.u32 	%r21, %r18, 12;
	xor.b32 	%r22, %r20, %r21;
	.loc	21	143	0
	sub.u32 	%r23, %r14, %r18;
	sub.u32 	%r24, %r23, %r22;
	shl.b32 	%r25, %r22, 16;
	xor.b32 	%r26, %r24, %r25;
	.loc	21	144	0
	sub.u32 	%r27, %r18, %r22;
	sub.u32 	%r28, %r27, %r26;
	shr.u32 	%r29, %r26, 5;
	xor.b32 	%r30, %r28, %r29;
	.loc	21	145	0
	sub.u32 	%r31, %r22, %r26;
	sub.u32 	%r32, %r31, %r30;
	shr.u32 	%r33, %r30, 3;
	xor.b32 	%r34, %r32, %r33;
	.loc	21	146	0
	sub.u32 	%r35, %r26, %r30;
	sub.u32 	%r36, %r35, %r34;
	shl.b32 	%r37, %r34, 10;
	xor.b32 	%r38, %r36, %r37;
	.loc	21	147	0
	sub.u32 	%r39, %r30, %r34;
	sub.u32 	%r40, %r39, %r38;
	shr.u32 	%r41, %r38, 15;
	xor.b32 	%r42, %r40, %r41;
	.loc	21	170	0
	mul.lo.u32 	%r43, %r42, 1103515245;
	add.u32 	%r44, %r43, 12345;
	shr.u32 	%r45, %r44, 16;
	and.b32 	%r46, %r45, 255;
	shl.b32 	%r47, %r46, 7;
	mul.lo.u32 	%r48, %r42, -1029531031;
	sub.u32 	%r49, %r48, 740551042;
	shr.u32 	%r50, %r49, 16;
	and.b32 	%r51, %r50, 255;
	xor.b32 	%r52, %r47, %r51;
	st.param.s32 	[__cudaretf__Z6Rand2Djjj], %r52;
	ret;
$LDWend__Z6Rand2Djjj:
	} // _Z6Rand2Djjj

	.visible .func (.param .s32 __cudaretf__Z6Rand2Dj) _Z6Rand2Dj (.param .u32 __cudaparmf1__Z6Rand2Dj)
	{
	.reg .u32 %r<60>;
	.loc	21	175	0
$LDWbegin__Z6Rand2Dj:
	ld.param.u32 	%r1, [__cudaparmf1__Z6Rand2Dj];
	mov.s32 	%r2, %r1;
	.loc	21	143	0
	cvt.s32.u32 	%r3, %ctaid.y;
	cvt.s32.u32 	%r4, %ntid.y;
	mul.lo.s32 	%r5, %r3, %r4;
	cvt.s32.u32 	%r6, %ctaid.x;
	cvt.s32.u32 	%r7, %ntid.x;
	mul.lo.s32 	%r8, %r6, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r5, %r9;
	mov.u32 	%r11, %tid.x;
	add.u32 	%r12, %r8, %r11;
	shr.u32 	%r13, %r10, 13;
	sub.u32 	%r14, %r2, %r12;
	sub.u32 	%r15, %r12, %r10;
	sub.u32 	%r16, %r14, %r10;
	xor.b32 	%r17, %r13, %r16;
	shl.b32 	%r18, %r17, 8;
	sub.u32 	%r19, %r15, %r17;
	sub.u32 	%r20, %r10, %r17;
	xor.b32 	%r21, %r18, %r19;
	shr.u32 	%r22, %r21, 13;
	sub.u32 	%r23, %r20, %r21;
	sub.u32 	%r24, %r17, %r21;
	xor.b32 	%r25, %r22, %r23;
	shr.u32 	%r26, %r25, 12;
	sub.u32 	%r27, %r24, %r25;
	xor.b32 	%r28, %r26, %r27;
	sub.u32 	%r29, %r21, %r25;
	sub.u32 	%r30, %r29, %r28;
	shl.b32 	%r31, %r28, 16;
	xor.b32 	%r32, %r30, %r31;
	.loc	21	144	0
	sub.u32 	%r33, %r25, %r28;
	sub.u32 	%r34, %r33, %r32;
	shr.u32 	%r35, %r32, 5;
	xor.b32 	%r36, %r34, %r35;
	.loc	21	145	0
	sub.u32 	%r37, %r28, %r32;
	sub.u32 	%r38, %r37, %r36;
	shr.u32 	%r39, %r36, 3;
	xor.b32 	%r40, %r38, %r39;
	.loc	21	146	0
	sub.u32 	%r41, %r32, %r36;
	sub.u32 	%r42, %r41, %r40;
	shl.b32 	%r43, %r40, 10;
	xor.b32 	%r44, %r42, %r43;
	.loc	21	147	0
	sub.u32 	%r45, %r36, %r40;
	sub.u32 	%r46, %r45, %r44;
	shr.u32 	%r47, %r44, 15;
	xor.b32 	%r48, %r46, %r47;
	.loc	21	176	0
	mul.lo.u32 	%r49, %r48, 1103515245;
	add.u32 	%r50, %r49, 12345;
	shr.u32 	%r51, %r50, 16;
	and.b32 	%r52, %r51, 255;
	shl.b32 	%r53, %r52, 7;
	mul.lo.u32 	%r54, %r48, -1029531031;
	sub.u32 	%r55, %r54, 740551042;
	shr.u32 	%r56, %r55, 16;
	and.b32 	%r57, %r56, 255;
	xor.b32 	%r58, %r53, %r57;
	st.param.s32 	[__cudaretf__Z6Rand2Dj], %r58;
	ret;
$LDWend__Z6Rand2Dj:
	} // _Z6Rand2Dj

	.visible .func (.param .align 8 .b8 __cudaretf__Z6Read2DI7ushort4ET_PKS1_iii[8]) _Z6Read2DI7ushort4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii)
	{
	.reg .u32 %r<14>;
	.reg .u64 %rd<7>;
	.loc	21	114	0
$LDWbegin__Z6Read2DI7ushort4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI7ushort4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	21	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 8;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.u16 	{%r9,%r10,%r11,%r12}, [%rd5+0];
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+0], %r9;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+2], %r10;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+4], %r11;
	st.param.u16 	[__cudaretf__Z6Read2DI7ushort4ET_PKS1_iii+6], %r12;
	ret;
$LDWend__Z6Read2DI7ushort4ET_PKS1_iii:
	} // _Z6Read2DI7ushort4ET_PKS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z6Read2DI6float4ET_PKS1_iii[16]) _Z6Read2DI6float4ET_PKS1_iii (.param .u64 __cudaparmf1__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf2__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf3__Z6Read2DI6float4ET_PKS1_iii, .param .s32 __cudaparmf4__Z6Read2DI6float4ET_PKS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<6>;
	.loc	21	114	0
$LDWbegin__Z6Read2DI6float4ET_PKS1_iii:
	ld.param.u64 	%rd1, [__cudaparmf1__Z6Read2DI6float4ET_PKS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf2__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf3__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf4__Z6Read2DI6float4ET_PKS1_iii];
	mov.s32 	%r6, %r5;
	.loc	21	115	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd5+0];
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+0], %f1;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+4], %f2;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+8], %f3;
	st.param.f32 	[__cudaretf__Z6Read2DI6float4ET_PKS1_iii+12], %f4;
	ret;
$LDWend__Z6Read2DI6float4ET_PKS1_iii:
	} // _Z6Read2DI6float4ET_PKS1_iii

	.visible .func _Z7Write2DI7ushort4EvT_PS1_iii (.param .align 8 .b8 __cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii[8], .param .u64 __cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii)
	{
	.reg .u32 %r<18>;
	.reg .u64 %rd<7>;
	.loc	21	125	0
$LDWbegin__Z7Write2DI7ushort4EvT_PS1_iii:
	ld.param.u16 	%r1, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+0];
	mov.s32 	%r2, %r1;
	ld.param.u16 	%r3, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+2];
	mov.s32 	%r4, %r3;
	ld.param.u16 	%r5, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+4];
	mov.s32 	%r6, %r5;
	ld.param.u16 	%r7, [__cudaparmf1__Z7Write2DI7ushort4EvT_PS1_iii+6];
	mov.s32 	%r8, %r7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r9, [__cudaparmf3__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r10, %r9;
	ld.param.u32 	%r11, [__cudaparmf4__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf5__Z7Write2DI7ushort4EvT_PS1_iii];
	mov.s32 	%r14, %r13;
	.loc	21	126	0
	mul.lo.s32 	%r15, %r10, %r14;
	add.s32 	%r16, %r12, %r15;
	cvt.s64.s32 	%rd3, %r16;
	mul.wide.s32 	%rd4, %r16, 8;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.u16 	[%rd5+0], {%r2,%r4,%r6,%r8};
	.loc	21	127	0
	ret;
$LDWend__Z7Write2DI7ushort4EvT_PS1_iii:
	} // _Z7Write2DI7ushort4EvT_PS1_iii

	.visible .func _Z7Write2DI6float4EvT_PS1_iii (.param .align 16 .b8 __cudaparmf1__Z7Write2DI6float4EvT_PS1_iii[16], .param .u64 __cudaparmf2__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf3__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf4__Z7Write2DI6float4EvT_PS1_iii, .param .s32 __cudaparmf5__Z7Write2DI6float4EvT_PS1_iii)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<7>;
	.reg .f32 %f<10>;
	.loc	21	125	0
$LDWbegin__Z7Write2DI6float4EvT_PS1_iii:
	ld.param.f32 	%f1, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z7Write2DI6float4EvT_PS1_iii+12];
	mov.f32 	%f8, %f7;
	ld.param.u64 	%rd1, [__cudaparmf2__Z7Write2DI6float4EvT_PS1_iii];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf3__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z7Write2DI6float4EvT_PS1_iii];
	mov.s32 	%r6, %r5;
	.loc	21	126	0
	mul.lo.s32 	%r7, %r2, %r6;
	add.s32 	%r8, %r4, %r7;
	cvt.s64.s32 	%rd3, %r8;
	mul.wide.s32 	%rd4, %r8, 16;
	add.u64 	%rd5, %rd2, %rd4;
	st.v4.f32 	[%rd5+0], {%f2,%f4,%f6,%f8};
	.loc	21	127	0
	ret;
$LDWend__Z7Write2DI6float4EvT_PS1_iii:
	} // _Z7Write2DI6float4EvT_PS1_iii

	.visible .func (.param .align 16 .b8 __cudaretf__Z18UnpremultiplyPixel8PixelRGB[16]) _Z18UnpremultiplyPixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z18UnpremultiplyPixel8PixelRGB[16])
	{
	.reg .f32 %f<23>;
	.reg .pred %p<3>;
	.loc	5	206	0
$LDWbegin__Z18UnpremultiplyPixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z18UnpremultiplyPixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_13_1282;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	5	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	5	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_13_1026;
$Lt_13_1282:
	.loc	5	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_13_1026:
	.loc	5	224	0
	mov.f32 	%f18, %f17;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+0], %f18;
	mov.f32 	%f19, %f16;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+4], %f19;
	mov.f32 	%f20, %f15;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+8], %f20;
	mov.f32 	%f21, %f10;
	st.param.f32 	[__cudaretf__Z18UnpremultiplyPixel8PixelRGB+12], %f21;
	ret;
$LDWend__Z18UnpremultiplyPixel8PixelRGB:
	} // _Z18UnpremultiplyPixel8PixelRGB

	.visible .func (.param .f32 __cudaretf__Z13ToLinearColorf) _Z13ToLinearColorf (.param .f32 __cudaparmf1__Z13ToLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	5	231	0
$LDWbegin__Z13ToLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13ToLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_14_1026;
	.loc	5	234	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f400ccccd;     	// 2.2
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z13ToLinearColorf;
$Lt_14_1026:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z13ToLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z13ToLinearColorf], %f13;
	ret;
$LDWend__Z13ToLinearColorf:
	} // _Z13ToLinearColorf

	.visible .func (.param .f32 __cudaretf__Z15FromLinearColorf) _Z15FromLinearColorf (.param .f32 __cudaparmf1__Z15FromLinearColorf)
	{
	.reg .f32 %f<15>;
	.reg .pred %p<3>;
	.loc	5	239	0
$LDWbegin__Z15FromLinearColorf:
	ld.param.f32 	%f1, [__cudaparmf1__Z15FromLinearColorf];
	mov.f32 	%f2, %f1;
	mov.f32 	%f3, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f3;
	@!%p1 bra 	$Lt_15_1026;
	.loc	5	242	0
	neg.ftz.f32 	%f4, %f2;
	lg2.approx.ftz.f32 	%f5, %f4;
	mov.f32 	%f6, 0f3ee8ba2e;     	// 0.454545
	mul.ftz.f32 	%f7, %f5, %f6;
	ex2.approx.ftz.f32 	%f8, %f7;
	neg.ftz.f32 	%f9, %f8;
	bra.uni 	$LBB4__Z15FromLinearColorf;
$Lt_15_1026:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f10, %f2;
	mov.f32 	%f11, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f12, %f10, %f11;
	ex2.approx.ftz.f32 	%f9, %f12;
$LBB4__Z15FromLinearColorf:
	mov.f32 	%f13, %f9;
	st.param.f32 	[__cudaretf__Z15FromLinearColorf], %f13;
	ret;
$LDWend__Z15FromLinearColorf:
	} // _Z15FromLinearColorf

	.visible .func (.param .align 16 .b8 __cudaretf__Z25PremultiplyLinearizePixel8PixelRGB[16]) _Z25PremultiplyLinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	5	252	0
$LDWbegin__Z25PremultiplyLinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z25PremultiplyLinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	5	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	5	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_16_4098;
	.loc	5	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_193_5;
$Lt_16_4098:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_193_5:
	.loc	5	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_16_4610;
	.loc	5	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_193_3;
$Lt_16_4610:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_193_3:
	.loc	5	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_16_5122;
	.loc	5	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_193_1;
$Lt_16_5122:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_193_1:
	.loc	5	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z25PremultiplyLinearizePixel8PixelRGB+12], %f45;
	ret;
$LDWend__Z25PremultiplyLinearizePixel8PixelRGB:
	} // _Z25PremultiplyLinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16]) _Z29UnpremultiplyUnlinearizePixel8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	5	263	0
$LDWbegin__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_17_5122;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	5	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	5	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_17_4866;
$Lt_17_5122:
	.loc	5	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_17_4866:
	.loc	5	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_17_5378;
	.loc	5	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_194_5;
$Lt_17_5378:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_194_5:
	.loc	5	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_17_5890;
	.loc	5	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_194_3;
$Lt_17_5890:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_194_3:
	.loc	5	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_17_6402;
	.loc	5	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_194_1;
$Lt_17_6402:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_194_1:
	.loc	5	269	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z29UnpremultiplyUnlinearizePixel8PixelRGB+12], %f51;
	ret;
$LDWend__Z29UnpremultiplyUnlinearizePixel8PixelRGB:
	} // _Z29UnpremultiplyUnlinearizePixel8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z20PremultiplyLinearize6float4[16]) _Z20PremultiplyLinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z20PremultiplyLinearize6float4[16])
	{
	.reg .f32 %f<47>;
	.reg .pred %p<5>;
	.loc	5	277	0
$LDWbegin__Z20PremultiplyLinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z20PremultiplyLinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z20PremultiplyLinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z20PremultiplyLinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z20PremultiplyLinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	5	254	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	.loc	5	255	0
	mov.f32 	%f10, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p1, %f2, %f10;
	@!%p1 bra 	$Lt_18_4098;
	.loc	5	234	0
	neg.ftz.f32 	%f11, %f2;
	lg2.approx.ftz.f32 	%f12, %f11;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f15, %f14;
	neg.ftz.f32 	%f16, %f15;
	bra.uni 	$LDWendi___log2f_195_5;
$Lt_18_4098:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f17, %f2;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f16, %f19;
$LDWendi___log2f_195_5:
	.loc	5	256	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f4, %f20;
	@!%p2 bra 	$Lt_18_4610;
	.loc	5	234	0
	neg.ftz.f32 	%f21, %f4;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_195_3;
$Lt_18_4610:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f27, %f4;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_195_3:
	.loc	5	257	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f6, %f30;
	@!%p3 bra 	$Lt_18_5122;
	.loc	5	234	0
	neg.ftz.f32 	%f31, %f6;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_195_1;
$Lt_18_5122:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f37, %f6;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_195_1:
	.loc	5	259	0
	mul.ftz.f32 	%f40, %f36, %f9;
	mul.ftz.f32 	%f41, %f26, %f9;
	.loc	5	278	0
	mul.ftz.f32 	%f42, %f16, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+0], %f42;
	mov.f32 	%f43, %f41;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+4], %f43;
	mov.f32 	%f44, %f40;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+8], %f44;
	mov.f32 	%f45, %f9;
	st.param.f32 	[__cudaretf__Z20PremultiplyLinearize6float4+12], %f45;
	ret;
$LDWend__Z20PremultiplyLinearize6float4:
	} // _Z20PremultiplyLinearize6float4

	.visible .func (.param .align 16 .b8 __cudaretf__Z24UnpremultiplyUnlinearize6float4[16]) _Z24UnpremultiplyUnlinearize6float4 (.param .align 16 .b8 __cudaparmf1__Z24UnpremultiplyUnlinearize6float4[16])
	{
	.reg .f32 %f<53>;
	.reg .pred %p<6>;
	.loc	5	284	0
$LDWbegin__Z24UnpremultiplyUnlinearize6float4:
	ld.param.f32 	%f1, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z24UnpremultiplyUnlinearize6float4+12];
	mov.f32 	%f8, %f7;
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
	mov.f32 	%f10, %f9;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f12, %f13;
	@%p1 bra 	$Lt_19_5122;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f14, %f9;
	mul.ftz.f32 	%f15, %f14, %f6;
	.loc	5	214	0
	mul.ftz.f32 	%f16, %f14, %f4;
	.loc	5	215	0
	mul.ftz.f32 	%f17, %f14, %f2;
	bra.uni 	$Lt_19_4866;
$Lt_19_5122:
	.loc	5	219	0
	mov.f32 	%f15, 0f00000000;    	// 0
	mov.f32 	%f16, 0f00000000;    	// 0
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
$Lt_19_4866:
	.loc	5	266	0
	mov.f32 	%f18, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p2, %f17, %f18;
	@!%p2 bra 	$Lt_19_5378;
	.loc	5	242	0
	neg.ftz.f32 	%f19, %f17;
	lg2.approx.ftz.f32 	%f20, %f19;
	mov.f32 	%f21, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f23, %f22;
	neg.ftz.f32 	%f24, %f23;
	bra.uni 	$LDWendi___log2f_196_5;
$Lt_19_5378:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f25, %f17;
	mov.f32 	%f26, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f24, %f27;
$LDWendi___log2f_196_5:
	.loc	5	267	0
	mov.f32 	%f28, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f16, %f28;
	@!%p3 bra 	$Lt_19_5890;
	.loc	5	242	0
	neg.ftz.f32 	%f29, %f16;
	lg2.approx.ftz.f32 	%f30, %f29;
	mov.f32 	%f31, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f33, %f32;
	neg.ftz.f32 	%f34, %f33;
	bra.uni 	$LDWendi___log2f_196_3;
$Lt_19_5890:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f35, %f16;
	mov.f32 	%f36, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f34, %f37;
$LDWendi___log2f_196_3:
	.loc	5	268	0
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f15, %f38;
	@!%p4 bra 	$Lt_19_6402;
	.loc	5	242	0
	neg.ftz.f32 	%f39, %f15;
	lg2.approx.ftz.f32 	%f40, %f39;
	mov.f32 	%f41, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f43, %f42;
	neg.ftz.f32 	%f44, %f43;
	bra.uni 	$LDWendi___log2f_196_1;
$Lt_19_6402:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f45, %f15;
	mov.f32 	%f46, 0f3ee8ba2e;    	// 0.454545
	mul.ftz.f32 	%f47, %f45, %f46;
	ex2.approx.ftz.f32 	%f44, %f47;
$LDWendi___log2f_196_1:
	.loc	5	285	0
	mov.f32 	%f48, %f24;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+0], %f48;
	mov.f32 	%f49, %f34;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+4], %f49;
	mov.f32 	%f50, %f44;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+8], %f50;
	mov.f32 	%f51, %f10;
	st.param.f32 	[__cudaretf__Z24UnpremultiplyUnlinearize6float4+12], %f51;
	ret;
$LDWend__Z24UnpremultiplyUnlinearize6float4:
	} // _Z24UnpremultiplyUnlinearize6float4
	.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135,22,153,62,162,69,22,63,213,120,233,61,33,201,44,190,111,155,169,190,0,0,0,63,0,0,0,63,70,94,214,190,232,134,166,189};

	.visible .func (.param .f32 __cudaretf__Z10GetLuma6018PixelRGB) _Z10GetLuma6018PixelRGB (.param .align 16 .b8 __cudaparmf1__Z10GetLuma6018PixelRGB[16])
	{
	.reg .f32 %f<15>;
	.loc	22	49	0
$LDWbegin__Z10GetLuma6018PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z10GetLuma6018PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z10GetLuma6018PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z10GetLuma6018PixelRGB+8];
	mov.f32 	%f6, %f5;
	.loc	22	50	0
	ld.const.f32 	%f7, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f8, %f7, %f4;
	ld.const.f32 	%f9, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f10, %f9, %f6, %f8;
	ld.const.f32 	%f11, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f12, %f11, %f2, %f10;
	cvt.ftz.sat.f32.f32 	%f13, %f12;
	st.param.f32 	[__cudaretf__Z10GetLuma6018PixelRGB], %f13;
	ret;
$LDWend__Z10GetLuma6018PixelRGB:
	} // _Z10GetLuma6018PixelRGB
	.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208,179,89,62,89,23,55,63,152,221,147,61,186,164,234,189,210,86,197,190,0,0,0,63,0,0,0,63,190,134,232,190,16,202,59,189};

	.visible .func (.param .f32 __cudaretf__Z10GetLuma7098PixelRGB) _Z10GetLuma7098PixelRGB (.param .align 16 .b8 __cudaparmf1__Z10GetLuma7098PixelRGB[16])
	{
	.reg .f32 %f<15>;
	.loc	22	54	0
$LDWbegin__Z10GetLuma7098PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z10GetLuma7098PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z10GetLuma7098PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z10GetLuma7098PixelRGB+8];
	mov.f32 	%f6, %f5;
	.loc	22	55	0
	ld.const.f32 	%f7, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f8, %f7, %f4;
	ld.const.f32 	%f9, [kRGB32f_To_709YPbPr+0];
	fma.rn.ftz.f32 	%f10, %f9, %f6, %f8;
	ld.const.f32 	%f11, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f12, %f11, %f2, %f10;
	cvt.ftz.sat.f32.f32 	%f13, %f12;
	st.param.f32 	[__cudaretf__Z10GetLuma7098PixelRGB], %f13;
	ret;
$LDWend__Z10GetLuma7098PixelRGB:
	} // _Z10GetLuma7098PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z15LinearComposite8PixelRGBS_bf[16]) _Z15LinearComposite8PixelRGBS_bf (.param .align 16 .b8 __cudaparmf1__Z15LinearComposite8PixelRGBS_bf[16], .param .align 16 .b8 __cudaparmf2__Z15LinearComposite8PixelRGBS_bf[16], .param .s32 __cudaparmf3__Z15LinearComposite8PixelRGBS_bf, .param .f32 __cudaparmf4__Z15LinearComposite8PixelRGBS_bf)
	{
	.reg .u32 %r<6>;
	.reg .f32 %f<138>;
	.reg .pred %p<15>;
	.loc	22	66	0
$LDWbegin__Z15LinearComposite8PixelRGBS_bf:
	ld.param.f32 	%f1, [__cudaparmf1__Z15LinearComposite8PixelRGBS_bf+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z15LinearComposite8PixelRGBS_bf+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z15LinearComposite8PixelRGBS_bf+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z15LinearComposite8PixelRGBS_bf+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z15LinearComposite8PixelRGBS_bf+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z15LinearComposite8PixelRGBS_bf+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z15LinearComposite8PixelRGBS_bf+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z15LinearComposite8PixelRGBS_bf+12];
	mov.f32 	%f16, %f15;
	ld.param.u32 	%r1, [__cudaparmf3__Z15LinearComposite8PixelRGBS_bf];
	cvt.s8.s32 	%r2, %r1;
	ld.param.f32 	%f17, [__cudaparmf4__Z15LinearComposite8PixelRGBS_bf];
	mov.f32 	%f18, %f17;
	mov.u32 	%r3, 0;
	setp.ne.s32 	%p1, %r2, %r3;
	@%p1 bra 	$Lt_22_14850;
	.loc	22	69	0
	cvt.ftz.sat.f32.f32 	%f8, %f8;
$Lt_22_14850:
	.loc	22	72	0
	mul.ftz.f32 	%f8, %f18, %f8;
	.loc	22	73	0
	cvt.ftz.sat.f32.f32 	%f16, %f16;
	mov.u32 	%r4, 0;
	setp.eq.s32 	%p2, %r2, %r4;
	@%p2 bra 	$Lt_22_15618;
	.loc	22	77	0
	mul.ftz.f32 	%f2, %f2, %f18;
	.loc	22	78	0
	mul.ftz.f32 	%f4, %f18, %f4;
	.loc	22	79	0
	mul.ftz.f32 	%f6, %f18, %f6;
	bra.uni 	$Lt_22_15362;
$Lt_22_15618:
	.loc	5	255	0
	mov.f32 	%f19, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f2, %f19;
	@!%p3 bra 	$Lt_22_15874;
	.loc	5	234	0
	neg.ftz.f32 	%f20, %f2;
	lg2.approx.ftz.f32 	%f21, %f20;
	mov.f32 	%f22, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f23, %f21, %f22;
	ex2.approx.ftz.f32 	%f24, %f23;
	neg.ftz.f32 	%f25, %f24;
	bra.uni 	$LDWendi___log2f_199_17;
$Lt_22_15874:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f26, %f2;
	mov.f32 	%f27, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f28, %f26, %f27;
	ex2.approx.ftz.f32 	%f25, %f28;
$LDWendi___log2f_199_17:
	.loc	5	256	0
	mov.f32 	%f29, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f4, %f29;
	@!%p4 bra 	$Lt_22_16386;
	.loc	5	234	0
	neg.ftz.f32 	%f30, %f4;
	lg2.approx.ftz.f32 	%f31, %f30;
	mov.f32 	%f32, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f33, %f31, %f32;
	ex2.approx.ftz.f32 	%f34, %f33;
	neg.ftz.f32 	%f35, %f34;
	bra.uni 	$LDWendi___log2f_199_15;
$Lt_22_16386:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f36, %f4;
	mov.f32 	%f37, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f38, %f36, %f37;
	ex2.approx.ftz.f32 	%f35, %f38;
$LDWendi___log2f_199_15:
	.loc	5	257	0
	mov.f32 	%f39, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f6, %f39;
	@!%p5 bra 	$Lt_22_16898;
	.loc	5	234	0
	neg.ftz.f32 	%f40, %f6;
	lg2.approx.ftz.f32 	%f41, %f40;
	mov.f32 	%f42, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f43, %f41, %f42;
	ex2.approx.ftz.f32 	%f44, %f43;
	neg.ftz.f32 	%f45, %f44;
	bra.uni 	$LDWendi___log2f_199_13;
$Lt_22_16898:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f46, %f6;
	mov.f32 	%f47, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f48, %f46, %f47;
	ex2.approx.ftz.f32 	%f45, %f48;
$LDWendi___log2f_199_13:
	.loc	22	83	0
	cvt.ftz.sat.f32.f32 	%f49, %f8;
	mul.ftz.f32 	%f2, %f25, %f49;
	mul.ftz.f32 	%f4, %f35, %f49;
	mul.ftz.f32 	%f6, %f45, %f49;
	mov.f32 	%f8, %f49;
$Lt_22_15362:
	mov.f32 	%f50, 0f3f800000;    	// 1
	sub.ftz.f32 	%f51, %f50, %f8;
	mul.ftz.f32 	%f52, %f51, %f16;
	add.ftz.f32 	%f53, %f52, %f8;
	mov.f32 	%f54, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f55, %f53, %f54;
	mov.f32 	%f56, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p6, %f55, %f56;
	@!%p6 bra 	$Lt_22_17666;
	mov.f32 	%f57, 0f00000000;    	// 0
	mov.f32 	%f58, 0f00000000;    	// 0
	mov.f32 	%f59, 0f00000000;    	// 0
	mov.f32 	%f60, 0f00000000;    	// 0
	bra.uni 	$Lt_22_17410;
$Lt_22_17666:
	.loc	22	96	0
	mov.f32 	%f61, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p7, %f10, %f61;
	@!%p7 bra 	$Lt_22_17922;
	.loc	5	234	0
	neg.ftz.f32 	%f62, %f10;
	lg2.approx.ftz.f32 	%f63, %f62;
	mov.f32 	%f64, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f65, %f63, %f64;
	ex2.approx.ftz.f32 	%f66, %f65;
	neg.ftz.f32 	%f67, %f66;
	bra.uni 	$LDWendi___log2f_199_11;
$Lt_22_17922:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f68, %f10;
	mov.f32 	%f69, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f70, %f68, %f69;
	ex2.approx.ftz.f32 	%f67, %f70;
$LDWendi___log2f_199_11:
	.loc	22	97	0
	mov.f32 	%f71, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p8, %f12, %f71;
	@!%p8 bra 	$Lt_22_18434;
	.loc	5	234	0
	neg.ftz.f32 	%f72, %f12;
	lg2.approx.ftz.f32 	%f73, %f72;
	mov.f32 	%f74, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f75, %f73, %f74;
	ex2.approx.ftz.f32 	%f76, %f75;
	neg.ftz.f32 	%f77, %f76;
	bra.uni 	$LDWendi___log2f_199_9;
$Lt_22_18434:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f78, %f12;
	mov.f32 	%f79, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f80, %f78, %f79;
	ex2.approx.ftz.f32 	%f77, %f80;
$LDWendi___log2f_199_9:
	.loc	22	98	0
	mov.f32 	%f81, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p9, %f14, %f81;
	@!%p9 bra 	$Lt_22_18946;
	.loc	5	234	0
	neg.ftz.f32 	%f82, %f14;
	lg2.approx.ftz.f32 	%f83, %f82;
	mov.f32 	%f84, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f85, %f83, %f84;
	ex2.approx.ftz.f32 	%f86, %f85;
	neg.ftz.f32 	%f87, %f86;
	bra.uni 	$LDWendi___log2f_199_7;
$Lt_22_18946:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f88, %f14;
	mov.f32 	%f89, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f90, %f88, %f89;
	ex2.approx.ftz.f32 	%f87, %f90;
$LDWendi___log2f_199_7:
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f91, %f53;
	mov.f32 	%f92, %f91;
	mov.f32 	%f93, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f94, %f91, %f93;
	mov.f32 	%f95, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p10, %f94, %f95;
	@%p10 bra 	$Lt_22_19714;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f96, %f91;
	fma.rn.ftz.f32 	%f97, %f52, %f87, %f6;
	mul.ftz.f32 	%f98, %f96, %f97;
	.loc	5	214	0
	fma.rn.ftz.f32 	%f99, %f52, %f77, %f4;
	mul.ftz.f32 	%f100, %f96, %f99;
	.loc	5	215	0
	fma.rn.ftz.f32 	%f101, %f52, %f67, %f2;
	mul.ftz.f32 	%f102, %f96, %f101;
	bra.uni 	$Lt_22_19458;
$Lt_22_19714:
	.loc	5	219	0
	mov.f32 	%f98, 0f00000000;    	// 0
	mov.f32 	%f100, 0f00000000;   	// 0
	mov.f32 	%f102, 0f00000000;   	// 0
	mov.f32 	%f92, 0f00000000;    	// 0
$Lt_22_19458:
	.loc	5	266	0
	mov.f32 	%f103, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p11, %f102, %f103;
	@!%p11 bra 	$Lt_22_19970;
	.loc	5	242	0
	neg.ftz.f32 	%f104, %f102;
	lg2.approx.ftz.f32 	%f105, %f104;
	mov.f32 	%f106, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f107, %f105, %f106;
	ex2.approx.ftz.f32 	%f108, %f107;
	neg.ftz.f32 	%f109, %f108;
	bra.uni 	$LDWendi___log2f_199_5;
$Lt_22_19970:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f110, %f102;
	mov.f32 	%f111, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f112, %f110, %f111;
	ex2.approx.ftz.f32 	%f109, %f112;
$LDWendi___log2f_199_5:
	.loc	5	267	0
	mov.f32 	%f113, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p12, %f100, %f113;
	@!%p12 bra 	$Lt_22_20482;
	.loc	5	242	0
	neg.ftz.f32 	%f114, %f100;
	lg2.approx.ftz.f32 	%f115, %f114;
	mov.f32 	%f116, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f117, %f115, %f116;
	ex2.approx.ftz.f32 	%f118, %f117;
	neg.ftz.f32 	%f119, %f118;
	bra.uni 	$LDWendi___log2f_199_3;
$Lt_22_20482:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f120, %f100;
	mov.f32 	%f121, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f122, %f120, %f121;
	ex2.approx.ftz.f32 	%f119, %f122;
$LDWendi___log2f_199_3:
	.loc	5	268	0
	mov.f32 	%f123, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p13, %f98, %f123;
	@!%p13 bra 	$Lt_22_20994;
	.loc	5	242	0
	neg.ftz.f32 	%f124, %f98;
	lg2.approx.ftz.f32 	%f125, %f124;
	mov.f32 	%f126, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f127, %f125, %f126;
	ex2.approx.ftz.f32 	%f128, %f127;
	neg.ftz.f32 	%f129, %f128;
	bra.uni 	$LDWendi___log2f_199_1;
$Lt_22_20994:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f130, %f98;
	mov.f32 	%f131, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f132, %f130, %f131;
	ex2.approx.ftz.f32 	%f129, %f132;
$LDWendi___log2f_199_1:
	.loc	22	101	0
	mov.f32 	%f60, %f109;
	mov.f32 	%f59, %f119;
	mov.f32 	%f58, %f129;
	mov.f32 	%f57, %f92;
$Lt_22_17410:
	.loc	22	103	0
	mov.f32 	%f133, %f60;
	st.param.f32 	[__cudaretf__Z15LinearComposite8PixelRGBS_bf+0], %f133;
	mov.f32 	%f134, %f59;
	st.param.f32 	[__cudaretf__Z15LinearComposite8PixelRGBS_bf+4], %f134;
	mov.f32 	%f135, %f58;
	st.param.f32 	[__cudaretf__Z15LinearComposite8PixelRGBS_bf+8], %f135;
	mov.f32 	%f136, %f57;
	st.param.f32 	[__cudaretf__Z15LinearComposite8PixelRGBS_bf+12], %f136;
	ret;
$LDWend__Z15LinearComposite8PixelRGBS_bf:
	} // _Z15LinearComposite8PixelRGBS_bf

	.visible .func (.param .align 16 .b8 __cudaretf__Z9ClipColor8PixelRGB[16]) _Z9ClipColor8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z9ClipColor8PixelRGB[16])
	{
	.reg .f32 %f<59>;
	.reg .pred %p<6>;
	.loc	22	112	0
$LDWbegin__Z9ClipColor8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z9ClipColor8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z9ClipColor8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z9ClipColor8PixelRGB+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z9ClipColor8PixelRGB+12];
	mov.f32 	%f8, %f7;
	.loc	22	113	0
	mov.f32 	%f9, %f2;
	mov.f32 	%f10, %f4;
	mov.f32 	%f11, %f6;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p1, %f4, %f2;
	selp.f32 	%f12, %f2, %f4, %p1;
	setp.lt.ftz.f32 	%p2, %f12, %f6;
	selp.f32 	%f13, %f12, %f6, %p2;
	mov.f32 	%f14, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f13, %f14;
	@!%p3 bra 	$Lt_23_4354;
	.loc	22	119	0
	ld.const.f32 	%f15, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f16, %f15, %f4;
	ld.const.f32 	%f17, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f18, %f17, %f6, %f16;
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f20, %f19, %f2, %f18;
	cvt.ftz.sat.f32.f32 	%f21, %f20;
	sub.ftz.f32 	%f22, %f21, %f13;
	sub.ftz.f32 	%f23, %f6, %f21;
	mul.ftz.f32 	%f24, %f21, %f23;
	div.approx.ftz.f32 	%f25, %f24, %f22;
	add.ftz.f32 	%f11, %f21, %f25;
	.loc	22	120	0
	sub.ftz.f32 	%f26, %f4, %f21;
	mul.ftz.f32 	%f27, %f21, %f26;
	div.approx.ftz.f32 	%f28, %f27, %f22;
	add.ftz.f32 	%f10, %f21, %f28;
	.loc	22	121	0
	sub.ftz.f32 	%f29, %f2, %f21;
	mul.ftz.f32 	%f30, %f21, %f29;
	div.approx.ftz.f32 	%f31, %f30, %f22;
	add.ftz.f32 	%f9, %f21, %f31;
$Lt_23_4354:
	max.ftz.f32 	%f32, %f4, %f2;
	max.ftz.f32 	%f33, %f32, %f6;
	mov.f32 	%f34, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p4, %f33, %f34;
	@!%p4 bra 	$Lt_23_4866;
	.loc	22	125	0
	ld.const.f32 	%f35, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f36, %f35, %f4;
	ld.const.f32 	%f37, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f38, %f37, %f6, %f36;
	ld.const.f32 	%f39, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f40, %f39, %f2, %f38;
	cvt.ftz.sat.f32.f32 	%f41, %f40;
	mov.f32 	%f42, 0f3f800000;    	// 1
	sub.ftz.f32 	%f43, %f42, %f41;
	sub.ftz.f32 	%f44, %f33, %f41;
	sub.ftz.f32 	%f45, %f11, %f41;
	mul.ftz.f32 	%f46, %f43, %f45;
	div.approx.ftz.f32 	%f47, %f46, %f44;
	add.ftz.f32 	%f11, %f41, %f47;
	.loc	22	126	0
	sub.ftz.f32 	%f48, %f10, %f41;
	mul.ftz.f32 	%f49, %f43, %f48;
	div.approx.ftz.f32 	%f50, %f49, %f44;
	add.ftz.f32 	%f10, %f41, %f50;
	.loc	22	127	0
	sub.ftz.f32 	%f51, %f9, %f41;
	mul.ftz.f32 	%f52, %f43, %f51;
	div.approx.ftz.f32 	%f53, %f52, %f44;
	add.ftz.f32 	%f9, %f41, %f53;
$Lt_23_4866:
	.loc	22	129	0
	mov.f32 	%f54, %f9;
	st.param.f32 	[__cudaretf__Z9ClipColor8PixelRGB+0], %f54;
	mov.f32 	%f55, %f10;
	st.param.f32 	[__cudaretf__Z9ClipColor8PixelRGB+4], %f55;
	mov.f32 	%f56, %f11;
	st.param.f32 	[__cudaretf__Z9ClipColor8PixelRGB+8], %f56;
	mov.f32 	%f57, %f8;
	st.param.f32 	[__cudaretf__Z9ClipColor8PixelRGB+12], %f57;
	ret;
$LDWend__Z9ClipColor8PixelRGB:
	} // _Z9ClipColor8PixelRGB

	.visible .func (.param .f32 __cudaretf__Z3MinIfET_S0_S0_) _Z3MinIfET_S0_S0_ (.param .f32 __cudaparmf1__Z3MinIfET_S0_S0_, .param .f32 __cudaparmf2__Z3MinIfET_S0_S0_)
	{
	.reg .f32 %f<7>;
	.reg .pred %p<3>;
	.loc	23	36	0
$LDWbegin__Z3MinIfET_S0_S0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z3MinIfET_S0_S0_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z3MinIfET_S0_S0_];
	mov.f32 	%f4, %f3;
	.loc	23	37	0
	setp.gt.ftz.f32 	%p1, %f2, %f4;
	selp.f32 	%f5, %f4, %f2, %p1;
	st.param.f32 	[__cudaretf__Z3MinIfET_S0_S0_], %f5;
	ret;
$LDWend__Z3MinIfET_S0_S0_:
	} // _Z3MinIfET_S0_S0_

	.visible .func (.param .f32 __cudaretf__Z3MaxIfET_S0_S0_) _Z3MaxIfET_S0_S0_ (.param .f32 __cudaparmf1__Z3MaxIfET_S0_S0_, .param .f32 __cudaparmf2__Z3MaxIfET_S0_S0_)
	{
	.reg .f32 %f<7>;
	.loc	23	54	0
$LDWbegin__Z3MaxIfET_S0_S0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z3MaxIfET_S0_S0_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z3MaxIfET_S0_S0_];
	mov.f32 	%f4, %f3;
	.loc	23	55	0
	max.ftz.f32 	%f5, %f2, %f4;
	st.param.f32 	[__cudaretf__Z3MaxIfET_S0_S0_], %f5;
	ret;
$LDWend__Z3MaxIfET_S0_S0_:
	} // _Z3MaxIfET_S0_S0_

	.visible .func (.param .align 16 .b8 __cudaretf__Z7SetLuma8PixelRGBf[16]) _Z7SetLuma8PixelRGBf (.param .align 16 .b8 __cudaparmf1__Z7SetLuma8PixelRGBf[16], .param .f32 __cudaparmf2__Z7SetLuma8PixelRGBf)
	{
	.reg .f32 %f<62>;
	.reg .pred %p<6>;
	.loc	22	134	0
$LDWbegin__Z7SetLuma8PixelRGBf:
	ld.param.f32 	%f1, [__cudaparmf1__Z7SetLuma8PixelRGBf+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z7SetLuma8PixelRGBf+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z7SetLuma8PixelRGBf+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z7SetLuma8PixelRGBf+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z7SetLuma8PixelRGBf];
	mov.f32 	%f10, %f9;
	.loc	22	113	0
	ld.const.f32 	%f11, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f12, %f4, %f11;
	ld.const.f32 	%f13, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f14, %f13, %f6, %f12;
	ld.const.f32 	%f15, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f16, %f15, %f2, %f14;
	cvt.ftz.sat.f32.f32 	%f17, %f16;
	sub.ftz.f32 	%f18, %f10, %f17;
	add.ftz.f32 	%f19, %f18, %f2;
	mov.f32 	%f20, %f19;
	add.ftz.f32 	%f21, %f18, %f4;
	mov.f32 	%f22, %f21;
	add.ftz.f32 	%f23, %f18, %f6;
	mov.f32 	%f24, %f23;
	.loc	22	50	0
	mul.ftz.f32 	%f25, %f21, %f11;
	fma.rn.ftz.f32 	%f26, %f13, %f23, %f25;
	fma.rn.ftz.f32 	%f27, %f15, %f19, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p1, %f21, %f19;
	selp.f32 	%f29, %f19, %f21, %p1;
	setp.lt.ftz.f32 	%p2, %f29, %f23;
	selp.f32 	%f30, %f29, %f23, %p2;
	mov.f32 	%f31, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f30, %f31;
	@!%p3 bra 	$Lt_26_4354;
	.loc	22	119	0
	sub.ftz.f32 	%f32, %f28, %f30;
	sub.ftz.f32 	%f33, %f23, %f28;
	mul.ftz.f32 	%f34, %f28, %f33;
	div.approx.ftz.f32 	%f35, %f34, %f32;
	add.ftz.f32 	%f24, %f28, %f35;
	.loc	22	120	0
	sub.ftz.f32 	%f36, %f21, %f28;
	mul.ftz.f32 	%f37, %f28, %f36;
	div.approx.ftz.f32 	%f38, %f37, %f32;
	add.ftz.f32 	%f22, %f28, %f38;
	.loc	22	121	0
	sub.ftz.f32 	%f39, %f19, %f28;
	mul.ftz.f32 	%f40, %f28, %f39;
	div.approx.ftz.f32 	%f41, %f40, %f32;
	add.ftz.f32 	%f20, %f28, %f41;
$Lt_26_4354:
	max.ftz.f32 	%f42, %f21, %f19;
	max.ftz.f32 	%f43, %f42, %f23;
	mov.f32 	%f44, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p4, %f43, %f44;
	@!%p4 bra 	$Lt_26_4866;
	.loc	27	529	0
	mov.f32 	%f45, 0f3f800000;    	// 1
	sub.ftz.f32 	%f46, %f45, %f28;
	sub.ftz.f32 	%f47, %f43, %f28;
	sub.ftz.f32 	%f48, %f24, %f28;
	mul.ftz.f32 	%f49, %f46, %f48;
	div.approx.ftz.f32 	%f50, %f49, %f47;
	.loc	22	125	0
	add.ftz.f32 	%f24, %f50, %f28;
	.loc	27	529	0
	sub.ftz.f32 	%f51, %f22, %f28;
	mul.ftz.f32 	%f52, %f46, %f51;
	div.approx.ftz.f32 	%f53, %f52, %f47;
	.loc	22	126	0
	add.ftz.f32 	%f22, %f53, %f28;
	.loc	27	529	0
	sub.ftz.f32 	%f54, %f20, %f28;
	mul.ftz.f32 	%f55, %f46, %f54;
	div.approx.ftz.f32 	%f56, %f55, %f47;
	.loc	22	127	0
	add.ftz.f32 	%f20, %f56, %f28;
$Lt_26_4866:
	.loc	22	141	0
	mov.f32 	%f57, %f20;
	st.param.f32 	[__cudaretf__Z7SetLuma8PixelRGBf+0], %f57;
	mov.f32 	%f58, %f22;
	st.param.f32 	[__cudaretf__Z7SetLuma8PixelRGBf+4], %f58;
	mov.f32 	%f59, %f24;
	st.param.f32 	[__cudaretf__Z7SetLuma8PixelRGBf+8], %f59;
	mov.f32 	%f60, %f8;
	st.param.f32 	[__cudaretf__Z7SetLuma8PixelRGBf+12], %f60;
	ret;
$LDWend__Z7SetLuma8PixelRGBf:
	} // _Z7SetLuma8PixelRGBf

	.visible .func (.param .f32 __cudaretf__Z13GetSaturation8PixelRGB) _Z13GetSaturation8PixelRGB (.param .align 16 .b8 __cudaparmf1__Z13GetSaturation8PixelRGB[16])
	{
	.reg .f32 %f<14>;
	.reg .pred %p<4>;
	.loc	22	145	0
$LDWbegin__Z13GetSaturation8PixelRGB:
	ld.param.f32 	%f1, [__cudaparmf1__Z13GetSaturation8PixelRGB+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13GetSaturation8PixelRGB+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13GetSaturation8PixelRGB+8];
	mov.f32 	%f6, %f5;
	.loc	22	146	0
	setp.lt.ftz.f32 	%p1, %f2, %f4;
	selp.f32 	%f7, %f2, %f4, %p1;
	max.ftz.f32 	%f8, %f2, %f4;
	max.ftz.f32 	%f9, %f6, %f8;
	setp.lt.ftz.f32 	%p2, %f7, %f6;
	selp.f32 	%f10, %f7, %f6, %p2;
	sub.ftz.f32 	%f11, %f9, %f10;
	cvt.ftz.sat.f32.f32 	%f12, %f11;
	st.param.f32 	[__cudaretf__Z13GetSaturation8PixelRGB], %f12;
	ret;
$LDWend__Z13GetSaturation8PixelRGB:
	} // _Z13GetSaturation8PixelRGB

	.visible .func (.param .align 16 .b8 __cudaretf__Z13SetSaturation8PixelRGBf[16]) _Z13SetSaturation8PixelRGBf (.param .align 16 .b8 __cudaparmf1__Z13SetSaturation8PixelRGBf[16], .param .f32 __cudaparmf2__Z13SetSaturation8PixelRGBf)
	{
	.reg .f32 %f<41>;
	.reg .pred %p<13>;
	.loc	22	151	0
$LDWbegin__Z13SetSaturation8PixelRGBf:
	ld.param.f32 	%f1, [__cudaparmf1__Z13SetSaturation8PixelRGBf+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z13SetSaturation8PixelRGBf+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z13SetSaturation8PixelRGBf+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z13SetSaturation8PixelRGBf+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z13SetSaturation8PixelRGBf];
	mov.f32 	%f10, %f9;
	.loc	22	154	0
	setp.lt.ftz.f32 	%p1, %f2, %f4;
	max.ftz.f32 	%f11, %f2, %f4;
	selp.f32 	%f12, %f2, %f4, %p1;
	max.ftz.f32 	%f13, %f11, %f6;
	setp.lt.ftz.f32 	%p2, %f12, %f6;
	selp.f32 	%f14, %f12, %f6, %p2;
	setp.eq.ftz.f32 	%p3, %f14, %f6;
	@!%p3 bra 	$Lt_28_11522;
	setp.eq.ftz.f32 	%p4, %f13, %f4;
	@!%p4 bra 	$Lt_28_12034;
	setp.gt.ftz.f32 	%p5, %f4, %f6;
	@!%p5 bra 	$Lt_28_12546;
	.loc	22	161	0
	sub.ftz.f32 	%f15, %f2, %f6;
	mul.ftz.f32 	%f16, %f10, %f15;
	sub.ftz.f32 	%f17, %f4, %f6;
	div.approx.ftz.f32 	%f18, %f16, %f17;
	.loc	22	162	0
	mov.f32 	%f19, %f10;
	bra.uni 	$Lt_28_12802;
$Lt_28_12546:
	.loc	22	166	0
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	bra.uni 	$Lt_28_12802;
$Lt_28_12034:
	setp.gt.ftz.f32 	%p6, %f2, %f6;
	@!%p6 bra 	$Lt_28_13058;
	.loc	22	173	0
	sub.ftz.f32 	%f20, %f4, %f6;
	mul.ftz.f32 	%f21, %f10, %f20;
	sub.ftz.f32 	%f22, %f2, %f6;
	div.approx.ftz.f32 	%f19, %f21, %f22;
	.loc	22	174	0
	mov.f32 	%f18, %f10;
	bra.uni 	$Lt_28_12802;
$Lt_28_13058:
	.loc	22	178	0
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
$Lt_28_12802:
$Lt_28_11778:
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_28_15362;
$Lt_28_11522:
	setp.eq.ftz.f32 	%p7, %f14, %f4;
	setp.eq.ftz.f32 	%p8, %f13, %f6;
	@!%p8 bra 	$Lt_28_13570;
	@!%p7 bra 	$Lt_28_14082;
	setp.lt.ftz.f32 	%p9, %f4, %f6;
	@!%p9 bra 	$Lt_28_14594;
	.loc	22	191	0
	sub.ftz.f32 	%f24, %f2, %f4;
	mul.ftz.f32 	%f25, %f10, %f24;
	sub.ftz.f32 	%f26, %f6, %f4;
	div.approx.ftz.f32 	%f18, %f25, %f26;
	.loc	22	192	0
	mov.f32 	%f23, %f10;
	bra.uni 	$Lt_28_14338;
$Lt_28_14594:
	.loc	22	196	0
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
$Lt_28_14338:
	mov.f32 	%f19, 0f00000000;    	// 0
	bra.uni 	$Lt_28_15362;
$Lt_28_14082:
	setp.lt.ftz.f32 	%p10, %f2, %f6;
	@!%p10 bra 	$Lt_28_15106;
	.loc	22	204	0
	sub.ftz.f32 	%f27, %f4, %f2;
	mul.ftz.f32 	%f28, %f10, %f27;
	sub.ftz.f32 	%f29, %f6, %f2;
	div.approx.ftz.f32 	%f19, %f28, %f29;
	.loc	22	205	0
	mov.f32 	%f23, %f10;
	bra.uni 	$Lt_28_14850;
$Lt_28_15106:
	.loc	22	209	0
	mov.f32 	%f23, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
$Lt_28_14850:
	.loc	22	211	0
	mov.f32 	%f18, 0f00000000;    	// 0
	bra.uni 	$Lt_28_15362;
$Lt_28_13570:
	@!%p7 bra 	$Lt_28_15618;
	setp.gt.ftz.f32 	%p11, %f2, %f4;
	@!%p11 bra 	$Lt_28_16130;
	.loc	22	220	0
	sub.ftz.f32 	%f30, %f6, %f4;
	mul.ftz.f32 	%f31, %f10, %f30;
	sub.ftz.f32 	%f32, %f2, %f4;
	div.approx.ftz.f32 	%f23, %f31, %f32;
	.loc	22	221	0
	mov.f32 	%f18, %f10;
	bra.uni 	$Lt_28_15874;
$Lt_28_16130:
	.loc	22	225	0
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
$Lt_28_15874:
	mov.f32 	%f19, 0f00000000;    	// 0
	bra.uni 	$Lt_28_15362;
$Lt_28_15618:
	@!%p1 bra 	$Lt_28_16642;
	.loc	22	233	0
	sub.ftz.f32 	%f33, %f6, %f2;
	mul.ftz.f32 	%f34, %f10, %f33;
	sub.ftz.f32 	%f35, %f4, %f2;
	div.approx.ftz.f32 	%f23, %f34, %f35;
	.loc	22	234	0
	mov.f32 	%f19, %f10;
	bra.uni 	$Lt_28_16386;
$Lt_28_16642:
	.loc	22	238	0
	mov.f32 	%f23, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
$Lt_28_16386:
	.loc	22	240	0
	mov.f32 	%f18, 0f00000000;    	// 0
$Lt_28_15362:
$Lt_28_13314:
$Lt_28_11266:
	.loc	22	244	0
	mov.f32 	%f36, %f18;
	st.param.f32 	[__cudaretf__Z13SetSaturation8PixelRGBf+0], %f36;
	mov.f32 	%f37, %f19;
	st.param.f32 	[__cudaretf__Z13SetSaturation8PixelRGBf+4], %f37;
	mov.f32 	%f38, %f23;
	st.param.f32 	[__cudaretf__Z13SetSaturation8PixelRGBf+8], %f38;
	mov.f32 	%f39, %f8;
	st.param.f32 	[__cudaretf__Z13SetSaturation8PixelRGBf+12], %f39;
	ret;
$LDWend__Z13SetSaturation8PixelRGBf:
	} // _Z13SetSaturation8PixelRGBf

	.visible .func (.param .f32 __cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff) _Z39BlendMode_ChannelFn_IR_BlendMode_Normalff (.param .f32 __cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff, .param .f32 __cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff)
	{
	.reg .f32 %f<5>;
	.loc	22	341	0
$LDWbegin__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff:
	ld.param.f32 	%f1, [__cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff];
	mov.f32 	%f2, %f1;
	.loc	22	342	0
	mov.f32 	%f3, %f2;
	st.param.f32 	[__cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff], %f3;
	ret;
$LDWend__Z39BlendMode_ChannelFn_IR_BlendMode_Normalff:
	} // _Z39BlendMode_ChannelFn_IR_BlendMode_Normalff

	.visible .func (.param .f32 __cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff) _Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff (.param .f32 __cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff, .param .f32 __cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff)
	{
	.reg .f32 %f<7>;
	.loc	22	345	0
$LDWbegin__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff:
	ld.param.f32 	%f1, [__cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff];
	mov.f32 	%f4, %f3;
	.loc	22	346	0
	min.ftz.f32 	%f5, %f2, %f4;
	st.param.f32 	[__cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff], %f5;
	ret;
$LDWend__Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff:
	} // _Z39BlendMode_ChannelFn_IR_BlendMode_Darkenff

	.visible .func (.param .f32 __cudaretf__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff) _Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff (.param .f32 __cudaparmf1__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff, .param .f32 __cudaparmf2__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff)
	{
	.reg .f32 %f<7>;
	.loc	22	349	0
$LDWbegin__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff:
	ld.param.f32 	%f1, [__cudaparmf1__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff];
	mov.f32 	%f4, %f3;
	.loc	22	350	0
	max.ftz.f32 	%f5, %f2, %f4;
	st.param.f32 	[__cudaretf__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff], %f5;
	ret;
$LDWend__Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff:
	} // _Z40BlendMode_ChannelFn_IR_BlendMode_Lightenff

	.visible .func (.param .f32 __cudaretf__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff) _Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff (.param .f32 __cudaparmf1__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff, .param .f32 __cudaparmf2__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff)
	{
	.reg .f32 %f<11>;
	.loc	22	353	0
$LDWbegin__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff:
	ld.param.f32 	%f1, [__cudaparmf1__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff];
	mov.f32 	%f4, %f3;
	.loc	22	354	0
	mul.ftz.f32 	%f5, %f2, %f4;
	mov.f32 	%f6, 0f00000000;     	// 0
	max.ftz.f32 	%f7, %f5, %f6;
	mov.f32 	%f8, 0f3f800000;     	// 1
	min.ftz.f32 	%f9, %f7, %f8;
	st.param.f32 	[__cudaretf__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff], %f9;
	ret;
$LDWend__Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff:
	} // _Z41BlendMode_ChannelFn_IR_BlendMode_Multiplyff

	.visible .func (.param .f32 __cudaretf__Z5ClampIfET_S0_S0_S0_) _Z5ClampIfET_S0_S0_S0_ (.param .f32 __cudaparmf1__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf2__Z5ClampIfET_S0_S0_S0_, .param .f32 __cudaparmf3__Z5ClampIfET_S0_S0_S0_)
	{
	.reg .f32 %f<10>;
	.loc	23	72	0
$LDWbegin__Z5ClampIfET_S0_S0_S0_:
	ld.param.f32 	%f1, [__cudaparmf1__Z5ClampIfET_S0_S0_S0_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z5ClampIfET_S0_S0_S0_];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z5ClampIfET_S0_S0_S0_];
	mov.f32 	%f6, %f5;
	.loc	23	73	0
	max.ftz.f32 	%f7, %f2, %f4;
	min.ftz.f32 	%f8, %f6, %f7;
	st.param.f32 	[__cudaretf__Z5ClampIfET_S0_S0_S0_], %f8;
	ret;
$LDWend__Z5ClampIfET_S0_S0_S0_:
	} // _Z5ClampIfET_S0_S0_S0_

	.visible .func (.param .f32 __cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff) _Z39BlendMode_ChannelFn_IR_BlendMode_Screenff (.param .f32 __cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff, .param .f32 __cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff)
	{
	.reg .f32 %f<25>;
	.loc	22	357	0
$LDWbegin__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff:
	ld.param.f32 	%f1, [__cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff];
	mov.f32 	%f4, %f3;
	.loc	22	360	0
	mov.f32 	%f5, 0f3f800000;     	// 1
	mov.f32 	%f6, 0f3f800000;     	// 1
	mov.f32 	%f7, 0f33d6bf95;     	// 1e-007
	max.ftz.f32 	%f8, %f4, %f7;
	mov.f32 	%f9, 0f3f800000;     	// 1
	min.ftz.f32 	%f10, %f8, %f9;
	sub.ftz.f32 	%f11, %f6, %f10;
	mov.f32 	%f12, 0f3f800000;    	// 1
	mov.f32 	%f13, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f14, %f2, %f13;
	mov.f32 	%f15, 0f3f800000;    	// 1
	min.ftz.f32 	%f16, %f14, %f15;
	sub.ftz.f32 	%f17, %f12, %f16;
	mul.ftz.f32 	%f18, %f11, %f17;
	sub.ftz.f32 	%f19, %f5, %f18;
	mov.f32 	%f20, 0f00000000;    	// 0
	max.ftz.f32 	%f21, %f19, %f20;
	mov.f32 	%f22, 0f3f800000;    	// 1
	min.ftz.f32 	%f23, %f21, %f22;
	st.param.f32 	[__cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff], %f23;
	ret;
$LDWend__Z39BlendMode_ChannelFn_IR_BlendMode_Screenff:
	} // _Z39BlendMode_ChannelFn_IR_BlendMode_Screenff

	.visible .func (.param .f32 __cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff) _Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff (.param .f32 __cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff, .param .f32 __cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff)
	{
	.reg .f32 %f<23>;
	.loc	22	363	0
$LDWbegin__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff:
	ld.param.f32 	%f1, [__cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff];
	mov.f32 	%f4, %f3;
	.loc	22	367	0
	mov.f32 	%f5, 0f3f800000;     	// 1
	mov.f32 	%f6, 0f3f800000;     	// 1
	mov.f32 	%f7, 0f00000000;     	// 0
	max.ftz.f32 	%f8, %f4, %f7;
	mov.f32 	%f9, 0f3f800000;     	// 1
	min.ftz.f32 	%f10, %f8, %f9;
	sub.ftz.f32 	%f11, %f6, %f10;
	mov.f32 	%f12, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f13, %f2, %f12;
	mov.f32 	%f14, 0f3f800000;    	// 1
	min.ftz.f32 	%f15, %f13, %f14;
	div.approx.ftz.f32 	%f16, %f11, %f15;
	sub.ftz.f32 	%f17, %f5, %f16;
	mov.f32 	%f18, 0f00000000;    	// 0
	max.ftz.f32 	%f19, %f17, %f18;
	mov.f32 	%f20, 0f3f800000;    	// 1
	min.ftz.f32 	%f21, %f19, %f20;
	st.param.f32 	[__cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff], %f21;
	ret;
$LDWend__Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff:
	} // _Z42BlendMode_ChannelFn_IR_BlendMode_ColorBurnff

	.visible .func (.param .f32 __cudaretf__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff) _Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff (.param .f32 __cudaparmf1__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff, .param .f32 __cudaparmf2__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff)
	{
	.reg .f32 %f<28>;
	.reg .pred %p<3>;
	.loc	22	370	0
$LDWbegin__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff:
	ld.param.f32 	%f1, [__cudaparmf1__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff];
	mov.f32 	%f4, %f3;
	.loc	22	373	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f7, 0f00000000;     	// 0
	max.ftz.f32 	%f8, %f2, %f7;
	mov.f32 	%f9, 0f3f800000;     	// 1
	min.ftz.f32 	%f10, %f6, %f9;
	mov.f32 	%f11, 0f3f800000;    	// 1
	min.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p1, %f10, %f13;
	@!%p1 bra 	$Lt_36_4354;
	add.ftz.f32 	%f14, %f12, %f12;
	mul.ftz.f32 	%f15, %f10, %f14;
	bra.uni 	$Lt_36_4098;
$Lt_36_4354:
	mov.f32 	%f16, 0f3f800000;    	// 1
	sub.ftz.f32 	%f17, %f16, %f12;
	mov.f32 	%f18, 0f3f800000;    	// 1
	add.ftz.f32 	%f19, %f17, %f17;
	mov.f32 	%f20, 0f3f800000;    	// 1
	sub.ftz.f32 	%f21, %f20, %f10;
	mul.ftz.f32 	%f22, %f19, %f21;
	sub.ftz.f32 	%f15, %f18, %f22;
$Lt_36_4098:
	mov.f32 	%f23, 0f00000000;    	// 0
	max.ftz.f32 	%f24, %f15, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	min.ftz.f32 	%f26, %f24, %f25;
	st.param.f32 	[__cudaretf__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff], %f26;
	ret;
$LDWend__Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff:
	} // _Z40BlendMode_ChannelFn_IR_BlendMode_Overlayff

	.visible .func (.param .f32 __cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff) _Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff (.param .f32 __cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff, .param .f32 __cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff)
	{
	.reg .f32 %f<27>;
	.reg .pred %p<3>;
	.loc	22	376	0
$LDWbegin__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff:
	ld.param.f32 	%f1, [__cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff];
	mov.f32 	%f4, %f3;
	.loc	22	380	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f2, %f5;
	mov.f32 	%f7, 0f00000000;     	// 0
	max.ftz.f32 	%f8, %f4, %f7;
	mov.f32 	%f9, 0f3f800000;     	// 1
	min.ftz.f32 	%f10, %f6, %f9;
	mov.f32 	%f11, 0f3f800000;    	// 1
	min.ftz.f32 	%f12, %f8, %f11;
	add.ftz.f32 	%f13, %f10, %f10;
	mov.f32 	%f14, 0fbf800000;    	// -1
	add.ftz.f32 	%f15, %f13, %f14;
	mov.f32 	%f16, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p1, %f10, %f16;
	@!%p1 bra 	$Lt_37_4354;
	mul.ftz.f32 	%f17, %f12, %f12;
	sub.ftz.f32 	%f18, %f12, %f17;
	fma.rn.ftz.f32 	%f19, %f15, %f18, %f12;
	bra.uni 	$Lt_37_4098;
$Lt_37_4354:
	sqrt.approx.ftz.f32 	%f20, %f12;
	sub.ftz.f32 	%f21, %f20, %f12;
	fma.rn.ftz.f32 	%f19, %f15, %f21, %f12;
$Lt_37_4098:
	mov.f32 	%f22, 0f00000000;    	// 0
	max.ftz.f32 	%f23, %f19, %f22;
	mov.f32 	%f24, 0f3f800000;    	// 1
	min.ftz.f32 	%f25, %f23, %f24;
	st.param.f32 	[__cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff], %f25;
	ret;
$LDWend__Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff:
	} // _Z42BlendMode_ChannelFn_IR_BlendMode_SoftLightff

	.visible .func (.param .f32 __cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff) _Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff (.param .f32 __cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff, .param .f32 __cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff)
	{
	.reg .f32 %f<28>;
	.reg .pred %p<3>;
	.loc	22	383	0
$LDWbegin__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff:
	ld.param.f32 	%f1, [__cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff];
	mov.f32 	%f4, %f3;
	.loc	22	386	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f2, %f5;
	mov.f32 	%f7, 0f00000000;     	// 0
	max.ftz.f32 	%f8, %f4, %f7;
	mov.f32 	%f9, 0f3f800000;     	// 1
	min.ftz.f32 	%f10, %f6, %f9;
	mov.f32 	%f11, 0f3f800000;    	// 1
	min.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p1, %f10, %f13;
	@!%p1 bra 	$Lt_38_4354;
	add.ftz.f32 	%f14, %f10, %f10;
	mul.ftz.f32 	%f15, %f12, %f14;
	bra.uni 	$Lt_38_4098;
$Lt_38_4354:
	mov.f32 	%f16, 0f3f800000;    	// 1
	sub.ftz.f32 	%f17, %f16, %f10;
	mov.f32 	%f18, 0f3f800000;    	// 1
	add.ftz.f32 	%f19, %f17, %f17;
	mov.f32 	%f20, 0f3f800000;    	// 1
	sub.ftz.f32 	%f21, %f20, %f12;
	mul.ftz.f32 	%f22, %f19, %f21;
	sub.ftz.f32 	%f15, %f18, %f22;
$Lt_38_4098:
	mov.f32 	%f23, 0f00000000;    	// 0
	max.ftz.f32 	%f24, %f15, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	min.ftz.f32 	%f26, %f24, %f25;
	st.param.f32 	[__cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff], %f26;
	ret;
$LDWend__Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff:
	} // _Z42BlendMode_ChannelFn_IR_BlendMode_HardLightff

	.visible .func (.param .f32 __cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff) _Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff (.param .f32 __cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff, .param .f32 __cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff)
	{
	.reg .f32 %f<8>;
	.loc	22	389	0
$LDWbegin__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff:
	ld.param.f32 	%f1, [__cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff];
	mov.f32 	%f4, %f3;
	.loc	22	390	0
	sub.ftz.f32 	%f5, %f2, %f4;
	abs.ftz.f32 	%f6, %f5;
	st.param.f32 	[__cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff], %f6;
	ret;
$LDWend__Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff:
	} // _Z43BlendMode_ChannelFn_IR_BlendMode_Differenceff

	.visible .func (.param .f32 __cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff) _Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff (.param .f32 __cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff, .param .f32 __cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff)
	{
	.reg .f32 %f<22>;
	.loc	22	393	0
$LDWbegin__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff:
	ld.param.f32 	%f1, [__cudaparmf1__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff];
	mov.f32 	%f4, %f3;
	.loc	22	396	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f2, %f5;
	mov.f32 	%f7, 0f00000000;     	// 0
	max.ftz.f32 	%f8, %f4, %f7;
	mov.f32 	%f9, 0f3f800000;     	// 1
	min.ftz.f32 	%f10, %f6, %f9;
	mov.f32 	%f11, 0f3f800000;    	// 1
	min.ftz.f32 	%f12, %f8, %f11;
	add.ftz.f32 	%f13, %f10, %f12;
	add.ftz.f32 	%f14, %f10, %f10;
	mul.ftz.f32 	%f15, %f12, %f14;
	sub.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	max.ftz.f32 	%f18, %f16, %f17;
	mov.f32 	%f19, 0f3f800000;    	// 1
	min.ftz.f32 	%f20, %f18, %f19;
	st.param.f32 	[__cudaretf__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff], %f20;
	ret;
$LDWend__Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff:
	} // _Z42BlendMode_ChannelFn_IR_BlendMode_Exclusionff

	.visible .func (.param .f32 __cudaretf__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff) _Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff (.param .f32 __cudaparmf1__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff, .param .f32 __cudaparmf2__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff)
	{
	.reg .f32 %f<19>;
	.loc	22	399	0
$LDWbegin__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff:
	ld.param.f32 	%f1, [__cudaparmf1__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff];
	mov.f32 	%f4, %f3;
	.loc	22	402	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f7, 0f3f800000;     	// 1
	min.ftz.f32 	%f8, %f6, %f7;
	mov.f32 	%f9, 0f00000000;     	// 0
	max.ftz.f32 	%f10, %f2, %f9;
	mov.f32 	%f11, 0f3f800000;    	// 1
	min.ftz.f32 	%f12, %f10, %f11;
	sub.ftz.f32 	%f13, %f8, %f12;
	mov.f32 	%f14, 0f00000000;    	// 0
	max.ftz.f32 	%f15, %f13, %f14;
	mov.f32 	%f16, 0f3f800000;    	// 1
	min.ftz.f32 	%f17, %f15, %f16;
	st.param.f32 	[__cudaretf__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff], %f17;
	ret;
$LDWend__Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff:
	} // _Z41BlendMode_ChannelFn_IR_BlendMode_Subtractff

	.visible .func (.param .f32 __cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff) _Z39BlendMode_ChannelFn_IR_BlendMode_Divideff (.param .f32 __cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff, .param .f32 __cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff)
	{
	.reg .f32 %f<19>;
	.loc	22	405	0
$LDWbegin__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff:
	ld.param.f32 	%f1, [__cudaparmf1__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff];
	mov.f32 	%f4, %f3;
	.loc	22	408	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f7, 0f3f800000;     	// 1
	min.ftz.f32 	%f8, %f6, %f7;
	mov.f32 	%f9, 0f33d6bf95;     	// 1e-007
	max.ftz.f32 	%f10, %f2, %f9;
	mov.f32 	%f11, 0f3f800000;    	// 1
	min.ftz.f32 	%f12, %f10, %f11;
	div.approx.ftz.f32 	%f13, %f8, %f12;
	mov.f32 	%f14, 0f00000000;    	// 0
	max.ftz.f32 	%f15, %f13, %f14;
	mov.f32 	%f16, 0f3f800000;    	// 1
	min.ftz.f32 	%f17, %f15, %f16;
	st.param.f32 	[__cudaretf__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff], %f17;
	ret;
$LDWend__Z39BlendMode_ChannelFn_IR_BlendMode_Divideff:
	} // _Z39BlendMode_ChannelFn_IR_BlendMode_Divideff

	.visible .func (.param .f32 __cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff) _Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff (.param .f32 __cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff, .param .f32 __cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff)
	{
	.reg .f32 %f<21>;
	.loc	22	411	0
$LDWbegin__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff:
	ld.param.f32 	%f1, [__cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff];
	mov.f32 	%f4, %f3;
	.loc	22	415	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f4, %f5;
	mov.f32 	%f7, 0f3f800000;     	// 1
	min.ftz.f32 	%f8, %f6, %f7;
	mov.f32 	%f9, 0f3f800000;     	// 1
	mov.f32 	%f10, 0f00000000;    	// 0
	max.ftz.f32 	%f11, %f2, %f10;
	mov.f32 	%f12, 0f3f7fff58;    	// 0.99999
	min.ftz.f32 	%f13, %f11, %f12;
	sub.ftz.f32 	%f14, %f9, %f13;
	div.approx.ftz.f32 	%f15, %f8, %f14;
	mov.f32 	%f16, 0f00000000;    	// 0
	max.ftz.f32 	%f17, %f15, %f16;
	mov.f32 	%f18, 0f3f800000;    	// 1
	min.ftz.f32 	%f19, %f17, %f18;
	st.param.f32 	[__cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff], %f19;
	ret;
$LDWend__Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff:
	} // _Z43BlendMode_ChannelFn_IR_BlendMode_ColorDodgeff

	.visible .func (.param .f32 __cudaretf__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff) _Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff (.param .f32 __cudaparmf1__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff, .param .f32 __cudaparmf2__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff)
	{
	.reg .f32 %f<11>;
	.loc	22	418	0
$LDWbegin__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff:
	ld.param.f32 	%f1, [__cudaparmf1__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff];
	mov.f32 	%f4, %f3;
	.loc	22	419	0
	add.ftz.f32 	%f5, %f2, %f4;
	mov.f32 	%f6, 0f00000000;     	// 0
	max.ftz.f32 	%f7, %f5, %f6;
	mov.f32 	%f8, 0f3f800000;     	// 1
	min.ftz.f32 	%f9, %f7, %f8;
	st.param.f32 	[__cudaretf__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff], %f9;
	ret;
$LDWend__Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff:
	} // _Z47BlendMode_ChannelFn_IR_BlendMode_LinearDodgeAddff

	.visible .func (.param .f32 __cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff) _Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff (.param .f32 __cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff, .param .f32 __cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff)
	{
	.reg .f32 %f<21>;
	.loc	22	422	0
$LDWbegin__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff:
	ld.param.f32 	%f1, [__cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff];
	mov.f32 	%f4, %f3;
	.loc	22	425	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f2, %f5;
	mov.f32 	%f7, 0f3f800000;     	// 1
	min.ftz.f32 	%f8, %f6, %f7;
	mov.f32 	%f9, 0f00000000;     	// 0
	max.ftz.f32 	%f10, %f4, %f9;
	mov.f32 	%f11, 0f3f800000;    	// 1
	min.ftz.f32 	%f12, %f10, %f11;
	add.ftz.f32 	%f13, %f8, %f12;
	mov.f32 	%f14, 0fbf800000;    	// -1
	add.ftz.f32 	%f15, %f13, %f14;
	mov.f32 	%f16, 0f00000000;    	// 0
	max.ftz.f32 	%f17, %f15, %f16;
	mov.f32 	%f18, 0f3f800000;    	// 1
	min.ftz.f32 	%f19, %f17, %f18;
	st.param.f32 	[__cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff], %f19;
	ret;
$LDWend__Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff:
	} // _Z43BlendMode_ChannelFn_IR_BlendMode_LinearBurnff

	.visible .func (.param .f32 __cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff) _Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff (.param .f32 __cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff, .param .f32 __cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff)
	{
	.reg .f32 %f<33>;
	.reg .pred %p<3>;
	.loc	22	428	0
$LDWbegin__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff:
	ld.param.f32 	%f1, [__cudaparmf1__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff];
	mov.f32 	%f4, %f3;
	.loc	22	431	0
	mov.f32 	%f5, 0f358637bd;     	// 1e-006
	max.ftz.f32 	%f6, %f2, %f5;
	mov.f32 	%f7, 0f00000000;     	// 0
	max.ftz.f32 	%f8, %f4, %f7;
	mov.f32 	%f9, 0f3f7fffef;     	// 0.999999
	min.ftz.f32 	%f10, %f6, %f9;
	mov.f32 	%f11, 0f3f800000;    	// 1
	min.ftz.f32 	%f12, %f8, %f11;
	mov.f32 	%f13, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p1, %f10, %f13;
	@!%p1 bra 	$Lt_46_4866;
	.loc	22	433	0
	mov.f32 	%f14, 0f3f800000;    	// 1
	mov.f32 	%f15, 0f3f800000;    	// 1
	sub.ftz.f32 	%f16, %f15, %f12;
	add.ftz.f32 	%f17, %f10, %f10;
	div.approx.ftz.f32 	%f18, %f16, %f17;
	sub.ftz.f32 	%f19, %f14, %f18;
	mov.f32 	%f20, 0f00000000;    	// 0
	max.ftz.f32 	%f21, %f19, %f20;
	mov.f32 	%f22, 0f3f800000;    	// 1
	min.ftz.f32 	%f23, %f21, %f22;
	bra.uni 	$LBB4__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff;
$Lt_46_4866:
	.loc	22	437	0
	mov.f32 	%f24, 0f3f800000;    	// 1
	sub.ftz.f32 	%f25, %f24, %f10;
	add.ftz.f32 	%f26, %f25, %f25;
	div.approx.ftz.f32 	%f27, %f12, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	max.ftz.f32 	%f29, %f27, %f28;
	mov.f32 	%f30, 0f3f800000;    	// 1
	min.ftz.f32 	%f23, %f29, %f30;
$LBB4__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff:
	mov.f32 	%f31, %f23;
	st.param.f32 	[__cudaretf__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff], %f31;
	ret;
$LDWend__Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff:
	} // _Z43BlendMode_ChannelFn_IR_BlendMode_VividLightff

	.visible .func (.param .f32 __cudaretf__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff) _Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff (.param .f32 __cudaparmf1__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff, .param .f32 __cudaparmf2__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff)
	{
	.reg .f32 %f<18>;
	.loc	22	441	0
$LDWbegin__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff:
	ld.param.f32 	%f1, [__cudaparmf1__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff];
	mov.f32 	%f4, %f3;
	.loc	22	444	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f2, %f5;
	mov.f32 	%f7, 0f3f800000;     	// 1
	min.ftz.f32 	%f8, %f6, %f7;
	add.ftz.f32 	%f9, %f8, %f8;
	mov.f32 	%f10, 0f00000000;    	// 0
	max.ftz.f32 	%f11, %f4, %f10;
	mov.f32 	%f12, 0f3f800000;    	// 1
	min.ftz.f32 	%f13, %f11, %f12;
	add.ftz.f32 	%f14, %f9, %f13;
	mov.f32 	%f15, 0fbf800000;    	// -1
	add.ftz.f32 	%f16, %f14, %f15;
	st.param.f32 	[__cudaretf__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff], %f16;
	ret;
$LDWend__Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff:
	} // _Z44BlendMode_ChannelFn_IR_BlendMode_LinearLightff

	.visible .func (.param .f32 __cudaretf__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff) _Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff (.param .f32 __cudaparmf1__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff, .param .f32 __cudaparmf2__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff)
	{
	.reg .f32 %f<19>;
	.reg .pred %p<4>;
	.loc	22	447	0
$LDWbegin__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff:
	ld.param.f32 	%f1, [__cudaparmf1__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff];
	mov.f32 	%f4, %f3;
	.loc	22	450	0
	mov.f32 	%f5, 0f00000000;     	// 0
	max.ftz.f32 	%f6, %f2, %f5;
	mov.f32 	%f7, 0f00000000;     	// 0
	max.ftz.f32 	%f8, %f4, %f7;
	mov.f32 	%f9, 0f3f800000;     	// 1
	min.ftz.f32 	%f10, %f6, %f9;
	mov.f32 	%f11, 0f3f800000;    	// 1
	min.ftz.f32 	%f12, %f8, %f11;
	add.ftz.f32 	%f13, %f10, %f10;
	mov.f32 	%f14, 0fbf800000;    	// -1
	add.ftz.f32 	%f15, %f13, %f14;
	setp.lt.ftz.f32 	%p1, %f12, %f15;
	@!%p1 bra 	$Lt_48_3330;
	.loc	22	452	0
	mov.f32 	%f16, %f15;
	bra.uni 	$LBB6__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff;
$Lt_48_3330:
	.loc	22	454	0
	setp.gt.ftz.f32 	%p2, %f12, %f13;
	@!%p2 bra 	$Lt_48_3586;
	.loc	22	456	0
	mov.f32 	%f16, %f13;
	bra.uni 	$LBB6__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff;
$Lt_48_3586:
	.loc	22	460	0
	mov.f32 	%f16, %f12;
$LBB6__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff:
	mov.f32 	%f17, %f16;
	st.param.f32 	[__cudaretf__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff], %f17;
	ret;
$LDWend__Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff:
	} // _Z41BlendMode_ChannelFn_IR_BlendMode_PinLightff

	.visible .func (.param .f32 __cudaretf__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff) _Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff (.param .f32 __cudaparmf1__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff, .param .f32 __cudaparmf2__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff)
	{
	.reg .f32 %f<11>;
	.reg .pred %p<3>;
	.loc	22	464	0
$LDWbegin__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff:
	ld.param.f32 	%f1, [__cudaparmf1__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff];
	mov.f32 	%f4, %f3;
	.loc	22	465	0
	mov.f32 	%f5, 0f00000000;     	// 0
	mov.f32 	%f6, 0f3f800000;     	// 1
	mov.f32 	%f7, 0f3f800000;     	// 1
	sub.ftz.f32 	%f8, %f7, %f4;
	setp.lt.ftz.f32 	%p1, %f2, %f8;
	selp.f32 	%f9, %f5, %f6, %p1;
	st.param.f32 	[__cudaretf__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff], %f9;
	ret;
$LDWend__Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff:
	} // _Z40BlendMode_ChannelFn_IR_BlendMode_HardMixff

	.visible .func (.param .align 16 .b8 __cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi[16]) _Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi, .param .s32 __cudaparmf4__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi)
	{
	.reg .f32 %f<51>;
	.reg .pred %p<3>;
	.loc	22	468	0
$LDWbegin__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_50_1282;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_50_1026;
$Lt_50_1282:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	mul.ftz.f32 	%f37, %f2, %f16;
	fma.rn.ftz.f32 	%f38, %f2, %f31, %f37;
	mul.ftz.f32 	%f39, %f33, %f38;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f39;
	mul.ftz.f32 	%f40, %f31, %f4;
	fma.rn.ftz.f32 	%f41, %f4, %f16, %f40;
	mul.ftz.f32 	%f42, %f33, %f41;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f42;
	mul.ftz.f32 	%f43, %f31, %f6;
	fma.rn.ftz.f32 	%f44, %f6, %f16, %f43;
	mul.ftz.f32 	%f45, %f33, %f44;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f45;
$Lt_50_1026:
	mov.f32 	%f46, %f29;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+0], %f46;
	mov.f32 	%f47, %f28;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+4], %f47;
	mov.f32 	%f48, %f27;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+8], %f48;
	mov.f32 	%f49, %f23;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi+12], %f49;
	ret;
$LDWend__Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi:
	} // _Z37BlendMode_PixelFn_IR_BlendMode_Normal8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi[16]) _Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi, .param .s32 __cudaparmf4__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi)
	{
	.reg .f32 %f<54>;
	.reg .pred %p<3>;
	.loc	22	469	0
$LDWbegin__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_51_2818;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_51_2562;
$Lt_51_2818:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	min.ftz.f32 	%f37, %f2, %f10;
	mul.ftz.f32 	%f38, %f16, %f37;
	fma.rn.ftz.f32 	%f39, %f2, %f31, %f38;
	mul.ftz.f32 	%f40, %f33, %f39;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f40;
	min.ftz.f32 	%f41, %f4, %f12;
	mul.ftz.f32 	%f42, %f16, %f41;
	fma.rn.ftz.f32 	%f43, %f4, %f31, %f42;
	mul.ftz.f32 	%f44, %f33, %f43;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f44;
	min.ftz.f32 	%f45, %f6, %f14;
	mul.ftz.f32 	%f46, %f16, %f45;
	fma.rn.ftz.f32 	%f47, %f6, %f31, %f46;
	mul.ftz.f32 	%f48, %f33, %f47;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f48;
$Lt_51_2562:
	mov.f32 	%f49, %f29;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+0], %f49;
	mov.f32 	%f50, %f28;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+4], %f50;
	mov.f32 	%f51, %f27;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+8], %f51;
	mov.f32 	%f52, %f23;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi+12], %f52;
	ret;
$LDWend__Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi:
	} // _Z37BlendMode_PixelFn_IR_BlendMode_Darken8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi[16]) _Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi, .param .s32 __cudaparmf4__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi)
	{
	.reg .f32 %f<54>;
	.reg .pred %p<3>;
	.loc	22	470	0
$LDWbegin__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_52_2818;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_52_2562;
$Lt_52_2818:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	max.ftz.f32 	%f37, %f2, %f10;
	mul.ftz.f32 	%f38, %f16, %f37;
	fma.rn.ftz.f32 	%f39, %f2, %f31, %f38;
	mul.ftz.f32 	%f40, %f33, %f39;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f40;
	max.ftz.f32 	%f41, %f4, %f12;
	mul.ftz.f32 	%f42, %f16, %f41;
	fma.rn.ftz.f32 	%f43, %f4, %f31, %f42;
	mul.ftz.f32 	%f44, %f33, %f43;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f44;
	max.ftz.f32 	%f45, %f6, %f14;
	mul.ftz.f32 	%f46, %f16, %f45;
	fma.rn.ftz.f32 	%f47, %f6, %f31, %f46;
	mul.ftz.f32 	%f48, %f33, %f47;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f48;
$Lt_52_2562:
	mov.f32 	%f49, %f29;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+0], %f49;
	mov.f32 	%f50, %f28;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+4], %f50;
	mov.f32 	%f51, %f27;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+8], %f51;
	mov.f32 	%f52, %f23;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi+12], %f52;
	ret;
$LDWend__Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi:
	} // _Z38BlendMode_PixelFn_IR_BlendMode_Lighten8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi[16]) _Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi, .param .s32 __cudaparmf4__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi)
	{
	.reg .f32 %f<66>;
	.reg .pred %p<3>;
	.loc	22	471	0
$LDWbegin__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_53_4354;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_53_4098;
$Lt_53_4354:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	mul.ftz.f32 	%f37, %f2, %f10;
	mov.f32 	%f38, 0f00000000;    	// 0
	max.ftz.f32 	%f39, %f37, %f38;
	mov.f32 	%f40, 0f3f800000;    	// 1
	min.ftz.f32 	%f41, %f39, %f40;
	mul.ftz.f32 	%f42, %f16, %f41;
	fma.rn.ftz.f32 	%f43, %f2, %f31, %f42;
	mul.ftz.f32 	%f44, %f33, %f43;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f44;
	mul.ftz.f32 	%f45, %f4, %f12;
	mov.f32 	%f46, 0f00000000;    	// 0
	max.ftz.f32 	%f47, %f45, %f46;
	mov.f32 	%f48, 0f3f800000;    	// 1
	min.ftz.f32 	%f49, %f47, %f48;
	mul.ftz.f32 	%f50, %f16, %f49;
	fma.rn.ftz.f32 	%f51, %f4, %f31, %f50;
	mul.ftz.f32 	%f52, %f33, %f51;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f52;
	mul.ftz.f32 	%f53, %f6, %f14;
	mov.f32 	%f54, 0f00000000;    	// 0
	max.ftz.f32 	%f55, %f53, %f54;
	mov.f32 	%f56, 0f3f800000;    	// 1
	min.ftz.f32 	%f57, %f55, %f56;
	mul.ftz.f32 	%f58, %f16, %f57;
	fma.rn.ftz.f32 	%f59, %f6, %f31, %f58;
	mul.ftz.f32 	%f60, %f33, %f59;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f60;
$Lt_53_4098:
	mov.f32 	%f61, %f29;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+0], %f61;
	mov.f32 	%f62, %f28;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+4], %f62;
	mov.f32 	%f63, %f27;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+8], %f63;
	mov.f32 	%f64, %f23;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi+12], %f64;
	ret;
$LDWend__Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi:
	} // _Z39BlendMode_PixelFn_IR_BlendMode_Multiply8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi[16]) _Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi, .param .s32 __cudaparmf4__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi)
	{
	.reg .f32 %f<108>;
	.reg .pred %p<3>;
	.loc	22	472	0
$LDWbegin__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_54_10498;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_54_10242;
$Lt_54_10498:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	mov.f32 	%f37, 0f3f800000;    	// 1
	mov.f32 	%f38, 0f3f800000;    	// 1
	mov.f32 	%f39, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f40, %f10, %f39;
	mov.f32 	%f41, 0f3f800000;    	// 1
	min.ftz.f32 	%f42, %f40, %f41;
	sub.ftz.f32 	%f43, %f38, %f42;
	mov.f32 	%f44, 0f3f800000;    	// 1
	mov.f32 	%f45, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f46, %f2, %f45;
	mov.f32 	%f47, 0f3f800000;    	// 1
	min.ftz.f32 	%f48, %f46, %f47;
	sub.ftz.f32 	%f49, %f44, %f48;
	mul.ftz.f32 	%f50, %f43, %f49;
	sub.ftz.f32 	%f51, %f37, %f50;
	mov.f32 	%f52, 0f00000000;    	// 0
	max.ftz.f32 	%f53, %f51, %f52;
	mov.f32 	%f54, 0f3f800000;    	// 1
	min.ftz.f32 	%f55, %f53, %f54;
	mul.ftz.f32 	%f56, %f16, %f55;
	fma.rn.ftz.f32 	%f57, %f2, %f31, %f56;
	mul.ftz.f32 	%f58, %f33, %f57;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f58;
	mov.f32 	%f59, 0f3f800000;    	// 1
	mov.f32 	%f60, 0f3f800000;    	// 1
	mov.f32 	%f61, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f62, %f12, %f61;
	mov.f32 	%f63, 0f3f800000;    	// 1
	min.ftz.f32 	%f64, %f62, %f63;
	sub.ftz.f32 	%f65, %f60, %f64;
	mov.f32 	%f66, 0f3f800000;    	// 1
	mov.f32 	%f67, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f68, %f4, %f67;
	mov.f32 	%f69, 0f3f800000;    	// 1
	min.ftz.f32 	%f70, %f68, %f69;
	sub.ftz.f32 	%f71, %f66, %f70;
	mul.ftz.f32 	%f72, %f65, %f71;
	sub.ftz.f32 	%f73, %f59, %f72;
	mov.f32 	%f74, 0f00000000;    	// 0
	max.ftz.f32 	%f75, %f73, %f74;
	mov.f32 	%f76, 0f3f800000;    	// 1
	min.ftz.f32 	%f77, %f75, %f76;
	mul.ftz.f32 	%f78, %f16, %f77;
	fma.rn.ftz.f32 	%f79, %f4, %f31, %f78;
	mul.ftz.f32 	%f80, %f33, %f79;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f80;
	mov.f32 	%f81, 0f3f800000;    	// 1
	mov.f32 	%f82, 0f3f800000;    	// 1
	mov.f32 	%f83, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f84, %f14, %f83;
	mov.f32 	%f85, 0f3f800000;    	// 1
	min.ftz.f32 	%f86, %f84, %f85;
	sub.ftz.f32 	%f87, %f82, %f86;
	mov.f32 	%f88, 0f3f800000;    	// 1
	mov.f32 	%f89, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f90, %f6, %f89;
	mov.f32 	%f91, 0f3f800000;    	// 1
	min.ftz.f32 	%f92, %f90, %f91;
	sub.ftz.f32 	%f93, %f88, %f92;
	mul.ftz.f32 	%f94, %f87, %f93;
	sub.ftz.f32 	%f95, %f81, %f94;
	mov.f32 	%f96, 0f00000000;    	// 0
	max.ftz.f32 	%f97, %f95, %f96;
	mov.f32 	%f98, 0f3f800000;    	// 1
	min.ftz.f32 	%f99, %f97, %f98;
	mul.ftz.f32 	%f100, %f16, %f99;
	fma.rn.ftz.f32 	%f101, %f6, %f31, %f100;
	mul.ftz.f32 	%f102, %f33, %f101;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f102;
$Lt_54_10242:
	mov.f32 	%f103, %f29;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+0], %f103;
	mov.f32 	%f104, %f28;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+4], %f104;
	mov.f32 	%f105, %f27;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+8], %f105;
	mov.f32 	%f106, %f23;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi+12], %f106;
	ret;
$LDWend__Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi:
	} // _Z37BlendMode_PixelFn_IR_BlendMode_Screen8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi[16]) _Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi, .param .s32 __cudaparmf4__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi)
	{
	.reg .f32 %f<102>;
	.reg .pred %p<3>;
	.loc	22	473	0
$LDWbegin__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_55_10498;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_55_10242;
$Lt_55_10498:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	mov.f32 	%f37, 0f3f800000;    	// 1
	mov.f32 	%f38, 0f3f800000;    	// 1
	mov.f32 	%f39, 0f00000000;    	// 0
	max.ftz.f32 	%f40, %f10, %f39;
	mov.f32 	%f41, 0f3f800000;    	// 1
	min.ftz.f32 	%f42, %f40, %f41;
	sub.ftz.f32 	%f43, %f38, %f42;
	mov.f32 	%f44, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f45, %f2, %f44;
	mov.f32 	%f46, 0f3f800000;    	// 1
	min.ftz.f32 	%f47, %f45, %f46;
	div.approx.ftz.f32 	%f48, %f43, %f47;
	sub.ftz.f32 	%f49, %f37, %f48;
	mov.f32 	%f50, 0f00000000;    	// 0
	max.ftz.f32 	%f51, %f49, %f50;
	mov.f32 	%f52, 0f3f800000;    	// 1
	min.ftz.f32 	%f53, %f51, %f52;
	mul.ftz.f32 	%f54, %f16, %f53;
	fma.rn.ftz.f32 	%f55, %f2, %f31, %f54;
	mul.ftz.f32 	%f56, %f33, %f55;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f56;
	mov.f32 	%f57, 0f3f800000;    	// 1
	mov.f32 	%f58, 0f3f800000;    	// 1
	mov.f32 	%f59, 0f00000000;    	// 0
	max.ftz.f32 	%f60, %f12, %f59;
	mov.f32 	%f61, 0f3f800000;    	// 1
	min.ftz.f32 	%f62, %f60, %f61;
	sub.ftz.f32 	%f63, %f58, %f62;
	mov.f32 	%f64, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f65, %f4, %f64;
	mov.f32 	%f66, 0f3f800000;    	// 1
	min.ftz.f32 	%f67, %f65, %f66;
	div.approx.ftz.f32 	%f68, %f63, %f67;
	sub.ftz.f32 	%f69, %f57, %f68;
	mov.f32 	%f70, 0f00000000;    	// 0
	max.ftz.f32 	%f71, %f69, %f70;
	mov.f32 	%f72, 0f3f800000;    	// 1
	min.ftz.f32 	%f73, %f71, %f72;
	mul.ftz.f32 	%f74, %f16, %f73;
	fma.rn.ftz.f32 	%f75, %f4, %f31, %f74;
	mul.ftz.f32 	%f76, %f33, %f75;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f76;
	mov.f32 	%f77, 0f3f800000;    	// 1
	mov.f32 	%f78, 0f3f800000;    	// 1
	mov.f32 	%f79, 0f00000000;    	// 0
	max.ftz.f32 	%f80, %f14, %f79;
	mov.f32 	%f81, 0f3f800000;    	// 1
	min.ftz.f32 	%f82, %f80, %f81;
	sub.ftz.f32 	%f83, %f78, %f82;
	mov.f32 	%f84, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f85, %f6, %f84;
	mov.f32 	%f86, 0f3f800000;    	// 1
	min.ftz.f32 	%f87, %f85, %f86;
	div.approx.ftz.f32 	%f88, %f83, %f87;
	sub.ftz.f32 	%f89, %f77, %f88;
	mov.f32 	%f90, 0f00000000;    	// 0
	max.ftz.f32 	%f91, %f89, %f90;
	mov.f32 	%f92, 0f3f800000;    	// 1
	min.ftz.f32 	%f93, %f91, %f92;
	mul.ftz.f32 	%f94, %f16, %f93;
	fma.rn.ftz.f32 	%f95, %f6, %f31, %f94;
	mul.ftz.f32 	%f96, %f33, %f95;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f96;
$Lt_55_10242:
	mov.f32 	%f97, %f29;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+0], %f97;
	mov.f32 	%f98, %f28;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+4], %f98;
	mov.f32 	%f99, %f27;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+8], %f99;
	mov.f32 	%f100, %f23;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi+12], %f100;
	ret;
$LDWend__Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi:
	} // _Z40BlendMode_PixelFn_IR_BlendMode_ColorBurn8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi)
	{
	.reg .f32 %f<96>;
	.reg .pred %p<3>;
	.loc	22	474	0
$LDWbegin__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_56_10498;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_56_10242;
$Lt_56_10498:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	max.ftz.f32 	%f38, %f2, %f37;
	mov.f32 	%f39, 0f3f800000;    	// 1
	min.ftz.f32 	%f40, %f38, %f39;
	mov.f32 	%f41, 0f00000000;    	// 0
	max.ftz.f32 	%f42, %f10, %f41;
	mov.f32 	%f43, 0f3f800000;    	// 1
	min.ftz.f32 	%f44, %f42, %f43;
	add.ftz.f32 	%f45, %f40, %f44;
	mov.f32 	%f46, 0fbf800000;    	// -1
	add.ftz.f32 	%f47, %f45, %f46;
	mov.f32 	%f48, 0f00000000;    	// 0
	max.ftz.f32 	%f49, %f47, %f48;
	mov.f32 	%f50, 0f3f800000;    	// 1
	min.ftz.f32 	%f51, %f49, %f50;
	mul.ftz.f32 	%f52, %f16, %f51;
	fma.rn.ftz.f32 	%f53, %f2, %f31, %f52;
	mul.ftz.f32 	%f54, %f33, %f53;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f54;
	mov.f32 	%f55, 0f00000000;    	// 0
	max.ftz.f32 	%f56, %f4, %f55;
	mov.f32 	%f57, 0f3f800000;    	// 1
	min.ftz.f32 	%f58, %f56, %f57;
	mov.f32 	%f59, 0f00000000;    	// 0
	max.ftz.f32 	%f60, %f12, %f59;
	mov.f32 	%f61, 0f3f800000;    	// 1
	min.ftz.f32 	%f62, %f60, %f61;
	add.ftz.f32 	%f63, %f58, %f62;
	mov.f32 	%f64, 0fbf800000;    	// -1
	add.ftz.f32 	%f65, %f63, %f64;
	mov.f32 	%f66, 0f00000000;    	// 0
	max.ftz.f32 	%f67, %f65, %f66;
	mov.f32 	%f68, 0f3f800000;    	// 1
	min.ftz.f32 	%f69, %f67, %f68;
	mul.ftz.f32 	%f70, %f16, %f69;
	fma.rn.ftz.f32 	%f71, %f4, %f31, %f70;
	mul.ftz.f32 	%f72, %f33, %f71;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f72;
	mov.f32 	%f73, 0f00000000;    	// 0
	max.ftz.f32 	%f74, %f6, %f73;
	mov.f32 	%f75, 0f3f800000;    	// 1
	min.ftz.f32 	%f76, %f74, %f75;
	mov.f32 	%f77, 0f00000000;    	// 0
	max.ftz.f32 	%f78, %f14, %f77;
	mov.f32 	%f79, 0f3f800000;    	// 1
	min.ftz.f32 	%f80, %f78, %f79;
	add.ftz.f32 	%f81, %f76, %f80;
	mov.f32 	%f82, 0fbf800000;    	// -1
	add.ftz.f32 	%f83, %f81, %f82;
	mov.f32 	%f84, 0f00000000;    	// 0
	max.ftz.f32 	%f85, %f83, %f84;
	mov.f32 	%f86, 0f3f800000;    	// 1
	min.ftz.f32 	%f87, %f85, %f86;
	mul.ftz.f32 	%f88, %f16, %f87;
	fma.rn.ftz.f32 	%f89, %f6, %f31, %f88;
	mul.ftz.f32 	%f90, %f33, %f89;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f90;
$Lt_56_10242:
	mov.f32 	%f91, %f29;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+0], %f91;
	mov.f32 	%f92, %f28;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+4], %f92;
	mov.f32 	%f93, %f27;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+8], %f93;
	mov.f32 	%f94, %f23;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi+12], %f94;
	ret;
$LDWend__Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi:
	} // _Z41BlendMode_PixelFn_IR_BlendMode_LinearBurn8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi)
	{
	.reg .f32 %f<96>;
	.reg .pred %p<3>;
	.loc	22	475	0
$LDWbegin__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_57_10498;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_57_10242;
$Lt_57_10498:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	max.ftz.f32 	%f38, %f10, %f37;
	mov.f32 	%f39, 0f3f800000;    	// 1
	min.ftz.f32 	%f40, %f38, %f39;
	mov.f32 	%f41, 0f3f800000;    	// 1
	mov.f32 	%f42, 0f00000000;    	// 0
	max.ftz.f32 	%f43, %f2, %f42;
	mov.f32 	%f44, 0f3f7fff58;    	// 0.99999
	min.ftz.f32 	%f45, %f43, %f44;
	sub.ftz.f32 	%f46, %f41, %f45;
	div.approx.ftz.f32 	%f47, %f40, %f46;
	mov.f32 	%f48, 0f00000000;    	// 0
	max.ftz.f32 	%f49, %f47, %f48;
	mov.f32 	%f50, 0f3f800000;    	// 1
	min.ftz.f32 	%f51, %f49, %f50;
	mul.ftz.f32 	%f52, %f16, %f51;
	fma.rn.ftz.f32 	%f53, %f2, %f31, %f52;
	mul.ftz.f32 	%f54, %f33, %f53;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f54;
	mov.f32 	%f55, 0f00000000;    	// 0
	max.ftz.f32 	%f56, %f12, %f55;
	mov.f32 	%f57, 0f3f800000;    	// 1
	min.ftz.f32 	%f58, %f56, %f57;
	mov.f32 	%f59, 0f3f800000;    	// 1
	mov.f32 	%f60, 0f00000000;    	// 0
	max.ftz.f32 	%f61, %f4, %f60;
	mov.f32 	%f62, 0f3f7fff58;    	// 0.99999
	min.ftz.f32 	%f63, %f61, %f62;
	sub.ftz.f32 	%f64, %f59, %f63;
	div.approx.ftz.f32 	%f65, %f58, %f64;
	mov.f32 	%f66, 0f00000000;    	// 0
	max.ftz.f32 	%f67, %f65, %f66;
	mov.f32 	%f68, 0f3f800000;    	// 1
	min.ftz.f32 	%f69, %f67, %f68;
	mul.ftz.f32 	%f70, %f16, %f69;
	fma.rn.ftz.f32 	%f71, %f4, %f31, %f70;
	mul.ftz.f32 	%f72, %f33, %f71;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f72;
	mov.f32 	%f73, 0f00000000;    	// 0
	max.ftz.f32 	%f74, %f14, %f73;
	mov.f32 	%f75, 0f3f800000;    	// 1
	min.ftz.f32 	%f76, %f74, %f75;
	mov.f32 	%f77, 0f3f800000;    	// 1
	mov.f32 	%f78, 0f00000000;    	// 0
	max.ftz.f32 	%f79, %f6, %f78;
	mov.f32 	%f80, 0f3f7fff58;    	// 0.99999
	min.ftz.f32 	%f81, %f79, %f80;
	sub.ftz.f32 	%f82, %f77, %f81;
	div.approx.ftz.f32 	%f83, %f76, %f82;
	mov.f32 	%f84, 0f00000000;    	// 0
	max.ftz.f32 	%f85, %f83, %f84;
	mov.f32 	%f86, 0f3f800000;    	// 1
	min.ftz.f32 	%f87, %f85, %f86;
	mul.ftz.f32 	%f88, %f16, %f87;
	fma.rn.ftz.f32 	%f89, %f6, %f31, %f88;
	mul.ftz.f32 	%f90, %f33, %f89;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f90;
$Lt_57_10242:
	mov.f32 	%f91, %f29;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+0], %f91;
	mov.f32 	%f92, %f28;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+4], %f92;
	mov.f32 	%f93, %f27;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+8], %f93;
	mov.f32 	%f94, %f23;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi+12], %f94;
	ret;
$LDWend__Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi:
	} // _Z41BlendMode_PixelFn_IR_BlendMode_ColorDodge8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi[16]) _Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi, .param .s32 __cudaparmf4__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi)
	{
	.reg .f32 %f<66>;
	.reg .pred %p<3>;
	.loc	22	476	0
$LDWbegin__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_58_4354;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_58_4098;
$Lt_58_4354:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	add.ftz.f32 	%f37, %f2, %f10;
	mov.f32 	%f38, 0f00000000;    	// 0
	max.ftz.f32 	%f39, %f37, %f38;
	mov.f32 	%f40, 0f3f800000;    	// 1
	min.ftz.f32 	%f41, %f39, %f40;
	mul.ftz.f32 	%f42, %f16, %f41;
	fma.rn.ftz.f32 	%f43, %f2, %f31, %f42;
	mul.ftz.f32 	%f44, %f33, %f43;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f44;
	add.ftz.f32 	%f45, %f4, %f12;
	mov.f32 	%f46, 0f00000000;    	// 0
	max.ftz.f32 	%f47, %f45, %f46;
	mov.f32 	%f48, 0f3f800000;    	// 1
	min.ftz.f32 	%f49, %f47, %f48;
	mul.ftz.f32 	%f50, %f16, %f49;
	fma.rn.ftz.f32 	%f51, %f4, %f31, %f50;
	mul.ftz.f32 	%f52, %f33, %f51;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f52;
	add.ftz.f32 	%f53, %f6, %f14;
	mov.f32 	%f54, 0f00000000;    	// 0
	max.ftz.f32 	%f55, %f53, %f54;
	mov.f32 	%f56, 0f3f800000;    	// 1
	min.ftz.f32 	%f57, %f55, %f56;
	mul.ftz.f32 	%f58, %f16, %f57;
	fma.rn.ftz.f32 	%f59, %f6, %f31, %f58;
	mul.ftz.f32 	%f60, %f33, %f59;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f60;
$Lt_58_4098:
	mov.f32 	%f61, %f29;
	st.param.f32 	[__cudaretf__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+0], %f61;
	mov.f32 	%f62, %f28;
	st.param.f32 	[__cudaretf__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+4], %f62;
	mov.f32 	%f63, %f27;
	st.param.f32 	[__cudaretf__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+8], %f63;
	mov.f32 	%f64, %f23;
	st.param.f32 	[__cudaretf__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi+12], %f64;
	ret;
$LDWend__Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi:
	} // _Z45BlendMode_PixelFn_IR_BlendMode_LinearDodgeAdd8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi[16]) _Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi, .param .s32 __cudaparmf4__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi)
	{
	.reg .f32 %f<117>;
	.reg .pred %p<6>;
	.loc	22	477	0
$LDWbegin__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_59_12802;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_59_12546;
$Lt_59_12802:
	.loc	22	373	0
	mov.f32 	%f30, 0f00000000;    	// 0
	max.ftz.f32 	%f31, %f10, %f30;
	mov.f32 	%f32, 0f00000000;    	// 0
	max.ftz.f32 	%f33, %f2, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	min.ftz.f32 	%f35, %f31, %f34;
	mov.f32 	%f36, 0f3f800000;    	// 1
	min.ftz.f32 	%f37, %f33, %f36;
	mov.f32 	%f38, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p2, %f35, %f38;
	@!%p2 bra 	$Lt_59_13314;
	add.ftz.f32 	%f39, %f37, %f37;
	mul.ftz.f32 	%f40, %f35, %f39;
	bra.uni 	$Lt_59_13058;
$Lt_59_13314:
	mov.f32 	%f41, 0f3f800000;    	// 1
	sub.ftz.f32 	%f42, %f41, %f37;
	mov.f32 	%f43, 0f3f800000;    	// 1
	add.ftz.f32 	%f44, %f42, %f42;
	mov.f32 	%f45, 0f3f800000;    	// 1
	sub.ftz.f32 	%f46, %f45, %f35;
	mul.ftz.f32 	%f47, %f44, %f46;
	sub.ftz.f32 	%f40, %f43, %f47;
$Lt_59_13058:
	.loc	22	477	0
	mov.f32 	%f48, 0f3f800000;    	// 1
	sub.ftz.f32 	%f49, %f48, %f16;
	rcp.approx.ftz.f32 	%f50, %f22;
	mul.ftz.f32 	%f51, %f19, %f50;
	mov.f32 	%f52, 0f3f800000;    	// 1
	mul.ftz.f32 	%f53, %f19, %f50;
	sub.ftz.f32 	%f54, %f52, %f53;
	mov.f32 	%f55, 0f00000000;    	// 0
	max.ftz.f32 	%f56, %f40, %f55;
	mov.f32 	%f57, 0f3f800000;    	// 1
	min.ftz.f32 	%f58, %f56, %f57;
	mul.ftz.f32 	%f59, %f16, %f58;
	fma.rn.ftz.f32 	%f60, %f2, %f49, %f59;
	mul.ftz.f32 	%f61, %f51, %f60;
	fma.rn.ftz.f32 	%f29, %f10, %f54, %f61;
	.loc	22	373	0
	mov.f32 	%f62, 0f00000000;    	// 0
	max.ftz.f32 	%f63, %f12, %f62;
	mov.f32 	%f64, 0f00000000;    	// 0
	max.ftz.f32 	%f65, %f4, %f64;
	mov.f32 	%f66, 0f3f800000;    	// 1
	min.ftz.f32 	%f67, %f63, %f66;
	mov.f32 	%f68, 0f3f800000;    	// 1
	min.ftz.f32 	%f69, %f65, %f68;
	mov.f32 	%f70, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p3, %f67, %f70;
	@!%p3 bra 	$Lt_59_13826;
	add.ftz.f32 	%f71, %f69, %f69;
	mul.ftz.f32 	%f72, %f67, %f71;
	bra.uni 	$Lt_59_13570;
$Lt_59_13826:
	mov.f32 	%f73, 0f3f800000;    	// 1
	sub.ftz.f32 	%f74, %f73, %f69;
	mov.f32 	%f75, 0f3f800000;    	// 1
	add.ftz.f32 	%f76, %f74, %f74;
	mov.f32 	%f77, 0f3f800000;    	// 1
	sub.ftz.f32 	%f78, %f77, %f67;
	mul.ftz.f32 	%f79, %f76, %f78;
	sub.ftz.f32 	%f72, %f75, %f79;
$Lt_59_13570:
	.loc	22	477	0
	mov.f32 	%f80, 0f00000000;    	// 0
	max.ftz.f32 	%f81, %f72, %f80;
	mov.f32 	%f82, 0f3f800000;    	// 1
	min.ftz.f32 	%f83, %f81, %f82;
	mul.ftz.f32 	%f84, %f16, %f83;
	fma.rn.ftz.f32 	%f85, %f4, %f49, %f84;
	mul.ftz.f32 	%f86, %f51, %f85;
	fma.rn.ftz.f32 	%f28, %f12, %f54, %f86;
	.loc	22	373	0
	mov.f32 	%f87, 0f00000000;    	// 0
	max.ftz.f32 	%f88, %f14, %f87;
	mov.f32 	%f89, 0f00000000;    	// 0
	max.ftz.f32 	%f90, %f6, %f89;
	mov.f32 	%f91, 0f3f800000;    	// 1
	min.ftz.f32 	%f92, %f88, %f91;
	mov.f32 	%f93, 0f3f800000;    	// 1
	min.ftz.f32 	%f94, %f90, %f93;
	mov.f32 	%f95, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p4, %f92, %f95;
	@!%p4 bra 	$Lt_59_14338;
	add.ftz.f32 	%f96, %f94, %f94;
	mul.ftz.f32 	%f97, %f92, %f96;
	bra.uni 	$Lt_59_14082;
$Lt_59_14338:
	mov.f32 	%f98, 0f3f800000;    	// 1
	sub.ftz.f32 	%f99, %f98, %f94;
	mov.f32 	%f100, 0f3f800000;   	// 1
	add.ftz.f32 	%f101, %f99, %f99;
	mov.f32 	%f102, 0f3f800000;   	// 1
	sub.ftz.f32 	%f103, %f102, %f92;
	mul.ftz.f32 	%f104, %f101, %f103;
	sub.ftz.f32 	%f97, %f100, %f104;
$Lt_59_14082:
	.loc	22	477	0
	mov.f32 	%f105, 0f00000000;   	// 0
	max.ftz.f32 	%f106, %f97, %f105;
	mov.f32 	%f107, 0f3f800000;   	// 1
	min.ftz.f32 	%f108, %f106, %f107;
	mul.ftz.f32 	%f109, %f16, %f108;
	fma.rn.ftz.f32 	%f110, %f6, %f49, %f109;
	mul.ftz.f32 	%f111, %f51, %f110;
	fma.rn.ftz.f32 	%f27, %f14, %f54, %f111;
$Lt_59_12546:
	mov.f32 	%f112, %f29;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+0], %f112;
	mov.f32 	%f113, %f28;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+4], %f113;
	mov.f32 	%f114, %f27;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+8], %f114;
	mov.f32 	%f115, %f23;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi+12], %f115;
	ret;
$LDWend__Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi:
	} // _Z38BlendMode_PixelFn_IR_BlendMode_Overlay8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi[16]) _Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi)
	{
	.reg .f32 %f<114>;
	.reg .pred %p<6>;
	.loc	22	478	0
$LDWbegin__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_60_12802;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_60_12546;
$Lt_60_12802:
	.loc	22	380	0
	mov.f32 	%f30, 0f00000000;    	// 0
	max.ftz.f32 	%f31, %f2, %f30;
	mov.f32 	%f32, 0f00000000;    	// 0
	max.ftz.f32 	%f33, %f10, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	min.ftz.f32 	%f35, %f31, %f34;
	mov.f32 	%f36, 0f3f800000;    	// 1
	min.ftz.f32 	%f37, %f33, %f36;
	add.ftz.f32 	%f38, %f35, %f35;
	mov.f32 	%f39, 0fbf800000;    	// -1
	add.ftz.f32 	%f40, %f38, %f39;
	mov.f32 	%f41, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p2, %f35, %f41;
	@!%p2 bra 	$Lt_60_13314;
	mul.ftz.f32 	%f42, %f37, %f37;
	sub.ftz.f32 	%f43, %f37, %f42;
	fma.rn.ftz.f32 	%f44, %f40, %f43, %f37;
	bra.uni 	$Lt_60_13058;
$Lt_60_13314:
	sqrt.approx.ftz.f32 	%f45, %f37;
	sub.ftz.f32 	%f46, %f45, %f37;
	fma.rn.ftz.f32 	%f44, %f40, %f46, %f37;
$Lt_60_13058:
	.loc	22	478	0
	mov.f32 	%f47, 0f3f800000;    	// 1
	sub.ftz.f32 	%f48, %f47, %f16;
	rcp.approx.ftz.f32 	%f49, %f22;
	mul.ftz.f32 	%f50, %f19, %f49;
	mov.f32 	%f51, 0f3f800000;    	// 1
	mul.ftz.f32 	%f52, %f19, %f49;
	sub.ftz.f32 	%f53, %f51, %f52;
	mov.f32 	%f54, 0f00000000;    	// 0
	max.ftz.f32 	%f55, %f44, %f54;
	mov.f32 	%f56, 0f3f800000;    	// 1
	min.ftz.f32 	%f57, %f55, %f56;
	mul.ftz.f32 	%f58, %f16, %f57;
	fma.rn.ftz.f32 	%f59, %f2, %f48, %f58;
	mul.ftz.f32 	%f60, %f50, %f59;
	fma.rn.ftz.f32 	%f29, %f10, %f53, %f60;
	.loc	22	380	0
	mov.f32 	%f61, 0f00000000;    	// 0
	max.ftz.f32 	%f62, %f4, %f61;
	mov.f32 	%f63, 0f00000000;    	// 0
	max.ftz.f32 	%f64, %f12, %f63;
	mov.f32 	%f65, 0f3f800000;    	// 1
	min.ftz.f32 	%f66, %f62, %f65;
	mov.f32 	%f67, 0f3f800000;    	// 1
	min.ftz.f32 	%f68, %f64, %f67;
	add.ftz.f32 	%f69, %f66, %f66;
	mov.f32 	%f70, 0fbf800000;    	// -1
	add.ftz.f32 	%f71, %f69, %f70;
	mov.f32 	%f72, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p3, %f66, %f72;
	@!%p3 bra 	$Lt_60_13826;
	mul.ftz.f32 	%f73, %f68, %f68;
	sub.ftz.f32 	%f74, %f68, %f73;
	fma.rn.ftz.f32 	%f75, %f71, %f74, %f68;
	bra.uni 	$Lt_60_13570;
$Lt_60_13826:
	sqrt.approx.ftz.f32 	%f76, %f68;
	sub.ftz.f32 	%f77, %f76, %f68;
	fma.rn.ftz.f32 	%f75, %f71, %f77, %f68;
$Lt_60_13570:
	.loc	22	478	0
	mov.f32 	%f78, 0f00000000;    	// 0
	max.ftz.f32 	%f79, %f75, %f78;
	mov.f32 	%f80, 0f3f800000;    	// 1
	min.ftz.f32 	%f81, %f79, %f80;
	mul.ftz.f32 	%f82, %f16, %f81;
	fma.rn.ftz.f32 	%f83, %f4, %f48, %f82;
	mul.ftz.f32 	%f84, %f50, %f83;
	fma.rn.ftz.f32 	%f28, %f12, %f53, %f84;
	.loc	22	380	0
	mov.f32 	%f85, 0f00000000;    	// 0
	max.ftz.f32 	%f86, %f6, %f85;
	mov.f32 	%f87, 0f00000000;    	// 0
	max.ftz.f32 	%f88, %f14, %f87;
	mov.f32 	%f89, 0f3f800000;    	// 1
	min.ftz.f32 	%f90, %f86, %f89;
	mov.f32 	%f91, 0f3f800000;    	// 1
	min.ftz.f32 	%f92, %f88, %f91;
	add.ftz.f32 	%f93, %f90, %f90;
	mov.f32 	%f94, 0fbf800000;    	// -1
	add.ftz.f32 	%f95, %f93, %f94;
	mov.f32 	%f96, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p4, %f90, %f96;
	@!%p4 bra 	$Lt_60_14338;
	mul.ftz.f32 	%f97, %f92, %f92;
	sub.ftz.f32 	%f98, %f92, %f97;
	fma.rn.ftz.f32 	%f99, %f95, %f98, %f92;
	bra.uni 	$Lt_60_14082;
$Lt_60_14338:
	sqrt.approx.ftz.f32 	%f100, %f92;
	sub.ftz.f32 	%f101, %f100, %f92;
	fma.rn.ftz.f32 	%f99, %f95, %f101, %f92;
$Lt_60_14082:
	.loc	22	478	0
	mov.f32 	%f102, 0f00000000;   	// 0
	max.ftz.f32 	%f103, %f99, %f102;
	mov.f32 	%f104, 0f3f800000;   	// 1
	min.ftz.f32 	%f105, %f103, %f104;
	mul.ftz.f32 	%f106, %f16, %f105;
	fma.rn.ftz.f32 	%f107, %f6, %f48, %f106;
	mul.ftz.f32 	%f108, %f50, %f107;
	fma.rn.ftz.f32 	%f27, %f14, %f53, %f108;
$Lt_60_12546:
	mov.f32 	%f109, %f29;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+0], %f109;
	mov.f32 	%f110, %f28;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+4], %f110;
	mov.f32 	%f111, %f27;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+8], %f111;
	mov.f32 	%f112, %f23;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi+12], %f112;
	ret;
$LDWend__Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi:
	} // _Z40BlendMode_PixelFn_IR_BlendMode_SoftLight8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi[16]) _Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi)
	{
	.reg .f32 %f<117>;
	.reg .pred %p<6>;
	.loc	22	479	0
$LDWbegin__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_61_12802;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_61_12546;
$Lt_61_12802:
	.loc	22	386	0
	mov.f32 	%f30, 0f00000000;    	// 0
	max.ftz.f32 	%f31, %f2, %f30;
	mov.f32 	%f32, 0f00000000;    	// 0
	max.ftz.f32 	%f33, %f10, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	min.ftz.f32 	%f35, %f31, %f34;
	mov.f32 	%f36, 0f3f800000;    	// 1
	min.ftz.f32 	%f37, %f33, %f36;
	mov.f32 	%f38, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p2, %f35, %f38;
	@!%p2 bra 	$Lt_61_13314;
	add.ftz.f32 	%f39, %f35, %f35;
	mul.ftz.f32 	%f40, %f37, %f39;
	bra.uni 	$Lt_61_13058;
$Lt_61_13314:
	mov.f32 	%f41, 0f3f800000;    	// 1
	sub.ftz.f32 	%f42, %f41, %f35;
	mov.f32 	%f43, 0f3f800000;    	// 1
	add.ftz.f32 	%f44, %f42, %f42;
	mov.f32 	%f45, 0f3f800000;    	// 1
	sub.ftz.f32 	%f46, %f45, %f37;
	mul.ftz.f32 	%f47, %f44, %f46;
	sub.ftz.f32 	%f40, %f43, %f47;
$Lt_61_13058:
	.loc	22	479	0
	mov.f32 	%f48, 0f3f800000;    	// 1
	sub.ftz.f32 	%f49, %f48, %f16;
	rcp.approx.ftz.f32 	%f50, %f22;
	mul.ftz.f32 	%f51, %f19, %f50;
	mov.f32 	%f52, 0f3f800000;    	// 1
	mul.ftz.f32 	%f53, %f19, %f50;
	sub.ftz.f32 	%f54, %f52, %f53;
	mov.f32 	%f55, 0f00000000;    	// 0
	max.ftz.f32 	%f56, %f40, %f55;
	mov.f32 	%f57, 0f3f800000;    	// 1
	min.ftz.f32 	%f58, %f56, %f57;
	mul.ftz.f32 	%f59, %f16, %f58;
	fma.rn.ftz.f32 	%f60, %f2, %f49, %f59;
	mul.ftz.f32 	%f61, %f51, %f60;
	fma.rn.ftz.f32 	%f29, %f10, %f54, %f61;
	.loc	22	386	0
	mov.f32 	%f62, 0f00000000;    	// 0
	max.ftz.f32 	%f63, %f4, %f62;
	mov.f32 	%f64, 0f00000000;    	// 0
	max.ftz.f32 	%f65, %f12, %f64;
	mov.f32 	%f66, 0f3f800000;    	// 1
	min.ftz.f32 	%f67, %f63, %f66;
	mov.f32 	%f68, 0f3f800000;    	// 1
	min.ftz.f32 	%f69, %f65, %f68;
	mov.f32 	%f70, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p3, %f67, %f70;
	@!%p3 bra 	$Lt_61_13826;
	add.ftz.f32 	%f71, %f67, %f67;
	mul.ftz.f32 	%f72, %f69, %f71;
	bra.uni 	$Lt_61_13570;
$Lt_61_13826:
	mov.f32 	%f73, 0f3f800000;    	// 1
	sub.ftz.f32 	%f74, %f73, %f67;
	mov.f32 	%f75, 0f3f800000;    	// 1
	add.ftz.f32 	%f76, %f74, %f74;
	mov.f32 	%f77, 0f3f800000;    	// 1
	sub.ftz.f32 	%f78, %f77, %f69;
	mul.ftz.f32 	%f79, %f76, %f78;
	sub.ftz.f32 	%f72, %f75, %f79;
$Lt_61_13570:
	.loc	22	479	0
	mov.f32 	%f80, 0f00000000;    	// 0
	max.ftz.f32 	%f81, %f72, %f80;
	mov.f32 	%f82, 0f3f800000;    	// 1
	min.ftz.f32 	%f83, %f81, %f82;
	mul.ftz.f32 	%f84, %f16, %f83;
	fma.rn.ftz.f32 	%f85, %f4, %f49, %f84;
	mul.ftz.f32 	%f86, %f51, %f85;
	fma.rn.ftz.f32 	%f28, %f12, %f54, %f86;
	.loc	22	386	0
	mov.f32 	%f87, 0f00000000;    	// 0
	max.ftz.f32 	%f88, %f6, %f87;
	mov.f32 	%f89, 0f00000000;    	// 0
	max.ftz.f32 	%f90, %f14, %f89;
	mov.f32 	%f91, 0f3f800000;    	// 1
	min.ftz.f32 	%f92, %f88, %f91;
	mov.f32 	%f93, 0f3f800000;    	// 1
	min.ftz.f32 	%f94, %f90, %f93;
	mov.f32 	%f95, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p4, %f92, %f95;
	@!%p4 bra 	$Lt_61_14338;
	add.ftz.f32 	%f96, %f92, %f92;
	mul.ftz.f32 	%f97, %f94, %f96;
	bra.uni 	$Lt_61_14082;
$Lt_61_14338:
	mov.f32 	%f98, 0f3f800000;    	// 1
	sub.ftz.f32 	%f99, %f98, %f92;
	mov.f32 	%f100, 0f3f800000;   	// 1
	add.ftz.f32 	%f101, %f99, %f99;
	mov.f32 	%f102, 0f3f800000;   	// 1
	sub.ftz.f32 	%f103, %f102, %f94;
	mul.ftz.f32 	%f104, %f101, %f103;
	sub.ftz.f32 	%f97, %f100, %f104;
$Lt_61_14082:
	.loc	22	479	0
	mov.f32 	%f105, 0f00000000;   	// 0
	max.ftz.f32 	%f106, %f97, %f105;
	mov.f32 	%f107, 0f3f800000;   	// 1
	min.ftz.f32 	%f108, %f106, %f107;
	mul.ftz.f32 	%f109, %f16, %f108;
	fma.rn.ftz.f32 	%f110, %f6, %f49, %f109;
	mul.ftz.f32 	%f111, %f51, %f110;
	fma.rn.ftz.f32 	%f27, %f14, %f54, %f111;
$Lt_61_12546:
	mov.f32 	%f112, %f29;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+0], %f112;
	mov.f32 	%f113, %f28;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+4], %f113;
	mov.f32 	%f114, %f27;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+8], %f114;
	mov.f32 	%f115, %f23;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi+12], %f115;
	ret;
$LDWend__Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi:
	} // _Z40BlendMode_PixelFn_IR_BlendMode_HardLight8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi)
	{
	.reg .f32 %f<129>;
	.reg .pred %p<6>;
	.loc	22	480	0
$LDWbegin__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_62_18178;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_62_17922;
$Lt_62_18178:
	.loc	22	431	0
	mov.f32 	%f30, 0f358637bd;    	// 1e-006
	max.ftz.f32 	%f31, %f2, %f30;
	mov.f32 	%f32, 0f00000000;    	// 0
	max.ftz.f32 	%f33, %f10, %f32;
	mov.f32 	%f34, 0f3f7fffef;    	// 0.999999
	min.ftz.f32 	%f35, %f31, %f34;
	mov.f32 	%f36, 0f3f800000;    	// 1
	min.ftz.f32 	%f37, %f33, %f36;
	mov.f32 	%f38, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p2, %f35, %f38;
	@!%p2 bra 	$Lt_62_16386;
	.loc	22	433	0
	mov.f32 	%f39, 0f3f800000;    	// 1
	mov.f32 	%f40, 0f3f800000;    	// 1
	sub.ftz.f32 	%f41, %f40, %f37;
	add.ftz.f32 	%f42, %f35, %f35;
	div.approx.ftz.f32 	%f43, %f41, %f42;
	sub.ftz.f32 	%f44, %f39, %f43;
	mov.f32 	%f45, 0f00000000;    	// 0
	max.ftz.f32 	%f46, %f44, %f45;
	mov.f32 	%f47, 0f3f800000;    	// 1
	min.ftz.f32 	%f48, %f46, %f47;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__239_5;
$Lt_62_16386:
	.loc	22	437	0
	mov.f32 	%f49, 0f3f800000;    	// 1
	sub.ftz.f32 	%f50, %f49, %f35;
	add.ftz.f32 	%f51, %f50, %f50;
	div.approx.ftz.f32 	%f52, %f37, %f51;
	mov.f32 	%f53, 0f00000000;    	// 0
	max.ftz.f32 	%f54, %f52, %f53;
	mov.f32 	%f55, 0f3f800000;    	// 1
	min.ftz.f32 	%f48, %f54, %f55;
$LDWendi__Z5ClampIfET_S0_S0_S0__239_5:
	.loc	22	480	0
	mov.f32 	%f56, 0f3f800000;    	// 1
	sub.ftz.f32 	%f57, %f56, %f16;
	rcp.approx.ftz.f32 	%f58, %f22;
	mul.ftz.f32 	%f59, %f19, %f58;
	mov.f32 	%f60, 0f3f800000;    	// 1
	mul.ftz.f32 	%f61, %f19, %f58;
	sub.ftz.f32 	%f62, %f60, %f61;
	mul.ftz.f32 	%f63, %f48, %f16;
	fma.rn.ftz.f32 	%f64, %f2, %f57, %f63;
	mul.ftz.f32 	%f65, %f59, %f64;
	fma.rn.ftz.f32 	%f29, %f10, %f62, %f65;
	.loc	22	431	0
	mov.f32 	%f66, 0f358637bd;    	// 1e-006
	max.ftz.f32 	%f67, %f4, %f66;
	mov.f32 	%f68, 0f00000000;    	// 0
	max.ftz.f32 	%f69, %f12, %f68;
	mov.f32 	%f70, 0f3f7fffef;    	// 0.999999
	min.ftz.f32 	%f71, %f67, %f70;
	mov.f32 	%f72, 0f3f800000;    	// 1
	min.ftz.f32 	%f73, %f69, %f72;
	mov.f32 	%f74, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p3, %f71, %f74;
	@!%p3 bra 	$Lt_62_16642;
	.loc	22	433	0
	mov.f32 	%f75, 0f3f800000;    	// 1
	mov.f32 	%f76, 0f3f800000;    	// 1
	sub.ftz.f32 	%f77, %f76, %f73;
	add.ftz.f32 	%f78, %f71, %f71;
	div.approx.ftz.f32 	%f79, %f77, %f78;
	sub.ftz.f32 	%f80, %f75, %f79;
	mov.f32 	%f81, 0f00000000;    	// 0
	max.ftz.f32 	%f82, %f80, %f81;
	mov.f32 	%f83, 0f3f800000;    	// 1
	min.ftz.f32 	%f84, %f82, %f83;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__239_3;
$Lt_62_16642:
	.loc	22	437	0
	mov.f32 	%f85, 0f3f800000;    	// 1
	sub.ftz.f32 	%f86, %f85, %f71;
	add.ftz.f32 	%f87, %f86, %f86;
	div.approx.ftz.f32 	%f88, %f73, %f87;
	mov.f32 	%f89, 0f00000000;    	// 0
	max.ftz.f32 	%f90, %f88, %f89;
	mov.f32 	%f91, 0f3f800000;    	// 1
	min.ftz.f32 	%f84, %f90, %f91;
$LDWendi__Z5ClampIfET_S0_S0_S0__239_3:
	.loc	22	480	0
	mul.ftz.f32 	%f92, %f84, %f16;
	fma.rn.ftz.f32 	%f93, %f4, %f57, %f92;
	mul.ftz.f32 	%f94, %f59, %f93;
	fma.rn.ftz.f32 	%f28, %f12, %f62, %f94;
	.loc	22	431	0
	mov.f32 	%f95, 0f358637bd;    	// 1e-006
	max.ftz.f32 	%f96, %f6, %f95;
	mov.f32 	%f97, 0f00000000;    	// 0
	max.ftz.f32 	%f98, %f14, %f97;
	mov.f32 	%f99, 0f3f7fffef;    	// 0.999999
	min.ftz.f32 	%f100, %f96, %f99;
	mov.f32 	%f101, 0f3f800000;   	// 1
	min.ftz.f32 	%f102, %f98, %f101;
	mov.f32 	%f103, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p4, %f100, %f103;
	@!%p4 bra 	$Lt_62_16898;
	.loc	22	433	0
	mov.f32 	%f104, 0f3f800000;   	// 1
	mov.f32 	%f105, 0f3f800000;   	// 1
	sub.ftz.f32 	%f106, %f105, %f102;
	add.ftz.f32 	%f107, %f100, %f100;
	div.approx.ftz.f32 	%f108, %f106, %f107;
	sub.ftz.f32 	%f109, %f104, %f108;
	mov.f32 	%f110, 0f00000000;   	// 0
	max.ftz.f32 	%f111, %f109, %f110;
	mov.f32 	%f112, 0f3f800000;   	// 1
	min.ftz.f32 	%f113, %f111, %f112;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__239_1;
$Lt_62_16898:
	.loc	22	437	0
	mov.f32 	%f114, 0f3f800000;   	// 1
	sub.ftz.f32 	%f115, %f114, %f100;
	add.ftz.f32 	%f116, %f115, %f115;
	div.approx.ftz.f32 	%f117, %f102, %f116;
	mov.f32 	%f118, 0f00000000;   	// 0
	max.ftz.f32 	%f119, %f117, %f118;
	mov.f32 	%f120, 0f3f800000;   	// 1
	min.ftz.f32 	%f113, %f119, %f120;
$LDWendi__Z5ClampIfET_S0_S0_S0__239_1:
	.loc	22	480	0
	mul.ftz.f32 	%f121, %f113, %f16;
	fma.rn.ftz.f32 	%f122, %f6, %f57, %f121;
	mul.ftz.f32 	%f123, %f59, %f122;
	fma.rn.ftz.f32 	%f27, %f14, %f62, %f123;
$Lt_62_17922:
	mov.f32 	%f124, %f29;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+0], %f124;
	mov.f32 	%f125, %f28;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+4], %f125;
	mov.f32 	%f126, %f27;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+8], %f126;
	mov.f32 	%f127, %f23;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi+12], %f127;
	ret;
$LDWend__Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi:
	} // _Z41BlendMode_PixelFn_IR_BlendMode_VividLight8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi[16]) _Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi)
	{
	.reg .f32 %f<87>;
	.reg .pred %p<3>;
	.loc	22	481	0
$LDWbegin__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_63_7426;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_63_7170;
$Lt_63_7426:
	mov.f32 	%f30, 0f00000000;    	// 0
	max.ftz.f32 	%f31, %f2, %f30;
	mov.f32 	%f32, 0f3f800000;    	// 1
	sub.ftz.f32 	%f33, %f32, %f16;
	mov.f32 	%f34, 0f3f800000;    	// 1
	min.ftz.f32 	%f35, %f31, %f34;
	rcp.approx.ftz.f32 	%f36, %f22;
	mul.ftz.f32 	%f37, %f19, %f36;
	mov.f32 	%f38, 0f3f800000;    	// 1
	mul.ftz.f32 	%f39, %f19, %f36;
	sub.ftz.f32 	%f40, %f38, %f39;
	add.ftz.f32 	%f41, %f35, %f35;
	mov.f32 	%f42, 0f00000000;    	// 0
	max.ftz.f32 	%f43, %f10, %f42;
	mov.f32 	%f44, 0f3f800000;    	// 1
	min.ftz.f32 	%f45, %f43, %f44;
	add.ftz.f32 	%f46, %f41, %f45;
	mov.f32 	%f47, 0fbf800000;    	// -1
	add.ftz.f32 	%f48, %f46, %f47;
	mul.ftz.f32 	%f49, %f16, %f48;
	fma.rn.ftz.f32 	%f50, %f2, %f33, %f49;
	mul.ftz.f32 	%f51, %f37, %f50;
	fma.rn.ftz.f32 	%f29, %f10, %f40, %f51;
	mov.f32 	%f52, 0f00000000;    	// 0
	max.ftz.f32 	%f53, %f4, %f52;
	mov.f32 	%f54, 0f3f800000;    	// 1
	min.ftz.f32 	%f55, %f53, %f54;
	add.ftz.f32 	%f56, %f55, %f55;
	mov.f32 	%f57, 0f00000000;    	// 0
	max.ftz.f32 	%f58, %f12, %f57;
	mov.f32 	%f59, 0f3f800000;    	// 1
	min.ftz.f32 	%f60, %f58, %f59;
	add.ftz.f32 	%f61, %f56, %f60;
	mov.f32 	%f62, 0fbf800000;    	// -1
	add.ftz.f32 	%f63, %f61, %f62;
	mul.ftz.f32 	%f64, %f16, %f63;
	fma.rn.ftz.f32 	%f65, %f4, %f33, %f64;
	mul.ftz.f32 	%f66, %f37, %f65;
	fma.rn.ftz.f32 	%f28, %f12, %f40, %f66;
	mov.f32 	%f67, 0f00000000;    	// 0
	max.ftz.f32 	%f68, %f6, %f67;
	mov.f32 	%f69, 0f3f800000;    	// 1
	min.ftz.f32 	%f70, %f68, %f69;
	add.ftz.f32 	%f71, %f70, %f70;
	mov.f32 	%f72, 0f00000000;    	// 0
	max.ftz.f32 	%f73, %f14, %f72;
	mov.f32 	%f74, 0f3f800000;    	// 1
	min.ftz.f32 	%f75, %f73, %f74;
	add.ftz.f32 	%f76, %f71, %f75;
	mov.f32 	%f77, 0fbf800000;    	// -1
	add.ftz.f32 	%f78, %f76, %f77;
	mul.ftz.f32 	%f79, %f16, %f78;
	fma.rn.ftz.f32 	%f80, %f6, %f33, %f79;
	mul.ftz.f32 	%f81, %f37, %f80;
	fma.rn.ftz.f32 	%f27, %f14, %f40, %f81;
$Lt_63_7170:
	mov.f32 	%f82, %f29;
	st.param.f32 	[__cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+0], %f82;
	mov.f32 	%f83, %f28;
	st.param.f32 	[__cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+4], %f83;
	mov.f32 	%f84, %f27;
	st.param.f32 	[__cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+8], %f84;
	mov.f32 	%f85, %f23;
	st.param.f32 	[__cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi+12], %f85;
	ret;
$LDWend__Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi:
	} // _Z42BlendMode_PixelFn_IR_BlendMode_LinearLight8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi[16]) _Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi, .param .s32 __cudaparmf4__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi)
	{
	.reg .f32 %f<87>;
	.reg .pred %p<9>;
	.loc	22	482	0
$LDWbegin__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_64_15106;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_64_14850;
$Lt_64_15106:
	.loc	22	450	0
	mov.f32 	%f30, 0f00000000;    	// 0
	max.ftz.f32 	%f31, %f2, %f30;
	mov.f32 	%f32, 0f00000000;    	// 0
	max.ftz.f32 	%f33, %f10, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	min.ftz.f32 	%f35, %f31, %f34;
	mov.f32 	%f36, 0f3f800000;    	// 1
	min.ftz.f32 	%f37, %f33, %f36;
	add.ftz.f32 	%f38, %f35, %f35;
	mov.f32 	%f39, 0fbf800000;    	// -1
	add.ftz.f32 	%f40, %f38, %f39;
	setp.lt.ftz.f32 	%p2, %f37, %f40;
	@!%p2 bra 	$Lt_64_11778;
	.loc	22	452	0
	mov.f32 	%f41, %f40;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__241_5;
$Lt_64_11778:
	.loc	22	454	0
	setp.gt.ftz.f32 	%p3, %f37, %f38;
	@!%p3 bra 	$Lt_64_12034;
	.loc	22	456	0
	mov.f32 	%f41, %f38;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__241_5;
$Lt_64_12034:
	.loc	22	460	0
	mov.f32 	%f41, %f37;
$LDWendi__Z5ClampIfET_S0_S0_S0__241_5:
	.loc	22	482	0
	mov.f32 	%f42, 0f3f800000;    	// 1
	sub.ftz.f32 	%f43, %f42, %f16;
	rcp.approx.ftz.f32 	%f44, %f22;
	mul.ftz.f32 	%f45, %f19, %f44;
	mov.f32 	%f46, 0f3f800000;    	// 1
	mul.ftz.f32 	%f47, %f19, %f44;
	sub.ftz.f32 	%f48, %f46, %f47;
	mul.ftz.f32 	%f49, %f41, %f16;
	fma.rn.ftz.f32 	%f50, %f2, %f43, %f49;
	mul.ftz.f32 	%f51, %f45, %f50;
	fma.rn.ftz.f32 	%f29, %f10, %f48, %f51;
	.loc	22	450	0
	mov.f32 	%f52, 0f00000000;    	// 0
	max.ftz.f32 	%f53, %f4, %f52;
	mov.f32 	%f54, 0f00000000;    	// 0
	max.ftz.f32 	%f55, %f12, %f54;
	mov.f32 	%f56, 0f3f800000;    	// 1
	min.ftz.f32 	%f57, %f53, %f56;
	mov.f32 	%f58, 0f3f800000;    	// 1
	min.ftz.f32 	%f59, %f55, %f58;
	add.ftz.f32 	%f60, %f57, %f57;
	mov.f32 	%f61, 0fbf800000;    	// -1
	add.ftz.f32 	%f62, %f60, %f61;
	setp.lt.ftz.f32 	%p4, %f59, %f62;
	@!%p4 bra 	$Lt_64_12290;
	.loc	22	452	0
	mov.f32 	%f63, %f62;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__241_3;
$Lt_64_12290:
	.loc	22	454	0
	setp.gt.ftz.f32 	%p5, %f59, %f60;
	@!%p5 bra 	$Lt_64_12546;
	.loc	22	456	0
	mov.f32 	%f63, %f60;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__241_3;
$Lt_64_12546:
	.loc	22	460	0
	mov.f32 	%f63, %f59;
$LDWendi__Z5ClampIfET_S0_S0_S0__241_3:
	.loc	22	482	0
	mul.ftz.f32 	%f64, %f63, %f16;
	fma.rn.ftz.f32 	%f65, %f4, %f43, %f64;
	mul.ftz.f32 	%f66, %f45, %f65;
	fma.rn.ftz.f32 	%f28, %f12, %f48, %f66;
	.loc	22	450	0
	mov.f32 	%f67, 0f00000000;    	// 0
	max.ftz.f32 	%f68, %f6, %f67;
	mov.f32 	%f69, 0f00000000;    	// 0
	max.ftz.f32 	%f70, %f14, %f69;
	mov.f32 	%f71, 0f3f800000;    	// 1
	min.ftz.f32 	%f72, %f68, %f71;
	mov.f32 	%f73, 0f3f800000;    	// 1
	min.ftz.f32 	%f74, %f70, %f73;
	add.ftz.f32 	%f75, %f72, %f72;
	mov.f32 	%f76, 0fbf800000;    	// -1
	add.ftz.f32 	%f77, %f75, %f76;
	setp.lt.ftz.f32 	%p6, %f74, %f77;
	@!%p6 bra 	$Lt_64_12802;
	.loc	22	452	0
	mov.f32 	%f78, %f77;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__241_1;
$Lt_64_12802:
	.loc	22	454	0
	setp.gt.ftz.f32 	%p7, %f74, %f75;
	@!%p7 bra 	$Lt_64_13058;
	.loc	22	456	0
	mov.f32 	%f78, %f75;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__241_1;
$Lt_64_13058:
	.loc	22	460	0
	mov.f32 	%f78, %f74;
$LDWendi__Z5ClampIfET_S0_S0_S0__241_1:
	.loc	22	482	0
	mul.ftz.f32 	%f79, %f78, %f16;
	fma.rn.ftz.f32 	%f80, %f6, %f43, %f79;
	mul.ftz.f32 	%f81, %f45, %f80;
	fma.rn.ftz.f32 	%f27, %f14, %f48, %f81;
$Lt_64_14850:
	mov.f32 	%f82, %f29;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+0], %f82;
	mov.f32 	%f83, %f28;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+4], %f83;
	mov.f32 	%f84, %f27;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+8], %f84;
	mov.f32 	%f85, %f23;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi+12], %f85;
	ret;
$LDWend__Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi:
	} // _Z39BlendMode_PixelFn_IR_BlendMode_PinLight8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi[16]) _Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi, .param .s32 __cudaparmf4__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi)
	{
	.reg .f32 %f<66>;
	.reg .pred %p<6>;
	.loc	22	483	0
$LDWbegin__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_65_3586;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_65_3330;
$Lt_65_3586:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	mov.f32 	%f38, 0f3f800000;    	// 1
	mov.f32 	%f39, 0f3f800000;    	// 1
	sub.ftz.f32 	%f40, %f39, %f10;
	setp.lt.ftz.f32 	%p2, %f2, %f40;
	selp.f32 	%f41, %f37, %f38, %p2;
	mul.ftz.f32 	%f42, %f41, %f16;
	fma.rn.ftz.f32 	%f43, %f2, %f31, %f42;
	mul.ftz.f32 	%f44, %f33, %f43;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f44;
	mov.f32 	%f45, 0f00000000;    	// 0
	mov.f32 	%f46, 0f3f800000;    	// 1
	mov.f32 	%f47, 0f3f800000;    	// 1
	sub.ftz.f32 	%f48, %f47, %f12;
	setp.lt.ftz.f32 	%p3, %f4, %f48;
	selp.f32 	%f49, %f45, %f46, %p3;
	mul.ftz.f32 	%f50, %f49, %f16;
	fma.rn.ftz.f32 	%f51, %f4, %f31, %f50;
	mul.ftz.f32 	%f52, %f33, %f51;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f52;
	mov.f32 	%f53, 0f00000000;    	// 0
	mov.f32 	%f54, 0f3f800000;    	// 1
	mov.f32 	%f55, 0f3f800000;    	// 1
	sub.ftz.f32 	%f56, %f55, %f14;
	setp.lt.ftz.f32 	%p4, %f6, %f56;
	selp.f32 	%f57, %f53, %f54, %p4;
	mul.ftz.f32 	%f58, %f57, %f16;
	fma.rn.ftz.f32 	%f59, %f6, %f31, %f58;
	mul.ftz.f32 	%f60, %f33, %f59;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f60;
$Lt_65_3330:
	mov.f32 	%f61, %f29;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+0], %f61;
	mov.f32 	%f62, %f28;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+4], %f62;
	mov.f32 	%f63, %f27;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+8], %f63;
	mov.f32 	%f64, %f23;
	st.param.f32 	[__cudaretf__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi+12], %f64;
	ret;
$LDWend__Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi:
	} // _Z38BlendMode_PixelFn_IR_BlendMode_HardMix8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi)
	{
	.reg .f32 %f<57>;
	.reg .pred %p<3>;
	.loc	22	484	0
$LDWbegin__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_66_1282;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_66_1026;
$Lt_66_1282:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	sub.ftz.f32 	%f37, %f2, %f10;
	abs.ftz.f32 	%f38, %f37;
	mul.ftz.f32 	%f39, %f16, %f38;
	fma.rn.ftz.f32 	%f40, %f2, %f31, %f39;
	mul.ftz.f32 	%f41, %f33, %f40;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f41;
	sub.ftz.f32 	%f42, %f4, %f12;
	abs.ftz.f32 	%f43, %f42;
	mul.ftz.f32 	%f44, %f16, %f43;
	fma.rn.ftz.f32 	%f45, %f4, %f31, %f44;
	mul.ftz.f32 	%f46, %f33, %f45;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f46;
	sub.ftz.f32 	%f47, %f6, %f14;
	abs.ftz.f32 	%f48, %f47;
	mul.ftz.f32 	%f49, %f16, %f48;
	fma.rn.ftz.f32 	%f50, %f6, %f31, %f49;
	mul.ftz.f32 	%f51, %f33, %f50;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f51;
$Lt_66_1026:
	mov.f32 	%f52, %f29;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+0], %f52;
	mov.f32 	%f53, %f28;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+4], %f53;
	mov.f32 	%f54, %f27;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+8], %f54;
	mov.f32 	%f55, %f23;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi+12], %f55;
	ret;
$LDWend__Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi:
	} // _Z41BlendMode_PixelFn_IR_BlendMode_Difference8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi[16]) _Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi, .param .s32 __cudaparmf4__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi)
	{
	.reg .f32 %f<99>;
	.reg .pred %p<3>;
	.loc	22	485	0
$LDWbegin__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_67_10498;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_67_10242;
$Lt_67_10498:
	mov.f32 	%f30, 0f00000000;    	// 0
	max.ftz.f32 	%f31, %f2, %f30;
	mov.f32 	%f32, 0f00000000;    	// 0
	max.ftz.f32 	%f33, %f10, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	sub.ftz.f32 	%f35, %f34, %f16;
	mov.f32 	%f36, 0f3f800000;    	// 1
	min.ftz.f32 	%f37, %f31, %f36;
	mov.f32 	%f38, 0f3f800000;    	// 1
	min.ftz.f32 	%f39, %f33, %f38;
	rcp.approx.ftz.f32 	%f40, %f22;
	mul.ftz.f32 	%f41, %f19, %f40;
	mov.f32 	%f42, 0f3f800000;    	// 1
	mul.ftz.f32 	%f43, %f19, %f40;
	sub.ftz.f32 	%f44, %f42, %f43;
	add.ftz.f32 	%f45, %f37, %f39;
	add.ftz.f32 	%f46, %f37, %f37;
	mul.ftz.f32 	%f47, %f39, %f46;
	sub.ftz.f32 	%f48, %f45, %f47;
	mov.f32 	%f49, 0f00000000;    	// 0
	max.ftz.f32 	%f50, %f48, %f49;
	mov.f32 	%f51, 0f3f800000;    	// 1
	min.ftz.f32 	%f52, %f50, %f51;
	mul.ftz.f32 	%f53, %f16, %f52;
	fma.rn.ftz.f32 	%f54, %f2, %f35, %f53;
	mul.ftz.f32 	%f55, %f41, %f54;
	fma.rn.ftz.f32 	%f29, %f10, %f44, %f55;
	mov.f32 	%f56, 0f00000000;    	// 0
	max.ftz.f32 	%f57, %f4, %f56;
	mov.f32 	%f58, 0f00000000;    	// 0
	max.ftz.f32 	%f59, %f12, %f58;
	mov.f32 	%f60, 0f3f800000;    	// 1
	min.ftz.f32 	%f61, %f57, %f60;
	mov.f32 	%f62, 0f3f800000;    	// 1
	min.ftz.f32 	%f63, %f59, %f62;
	add.ftz.f32 	%f64, %f61, %f63;
	add.ftz.f32 	%f65, %f61, %f61;
	mul.ftz.f32 	%f66, %f63, %f65;
	sub.ftz.f32 	%f67, %f64, %f66;
	mov.f32 	%f68, 0f00000000;    	// 0
	max.ftz.f32 	%f69, %f67, %f68;
	mov.f32 	%f70, 0f3f800000;    	// 1
	min.ftz.f32 	%f71, %f69, %f70;
	mul.ftz.f32 	%f72, %f16, %f71;
	fma.rn.ftz.f32 	%f73, %f4, %f35, %f72;
	mul.ftz.f32 	%f74, %f41, %f73;
	fma.rn.ftz.f32 	%f28, %f12, %f44, %f74;
	mov.f32 	%f75, 0f00000000;    	// 0
	max.ftz.f32 	%f76, %f6, %f75;
	mov.f32 	%f77, 0f00000000;    	// 0
	max.ftz.f32 	%f78, %f14, %f77;
	mov.f32 	%f79, 0f3f800000;    	// 1
	min.ftz.f32 	%f80, %f76, %f79;
	mov.f32 	%f81, 0f3f800000;    	// 1
	min.ftz.f32 	%f82, %f78, %f81;
	add.ftz.f32 	%f83, %f80, %f82;
	add.ftz.f32 	%f84, %f80, %f80;
	mul.ftz.f32 	%f85, %f82, %f84;
	sub.ftz.f32 	%f86, %f83, %f85;
	mov.f32 	%f87, 0f00000000;    	// 0
	max.ftz.f32 	%f88, %f86, %f87;
	mov.f32 	%f89, 0f3f800000;    	// 1
	min.ftz.f32 	%f90, %f88, %f89;
	mul.ftz.f32 	%f91, %f16, %f90;
	fma.rn.ftz.f32 	%f92, %f6, %f35, %f91;
	mul.ftz.f32 	%f93, %f41, %f92;
	fma.rn.ftz.f32 	%f27, %f14, %f44, %f93;
$Lt_67_10242:
	mov.f32 	%f94, %f29;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+0], %f94;
	mov.f32 	%f95, %f28;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+4], %f95;
	mov.f32 	%f96, %f27;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+8], %f96;
	mov.f32 	%f97, %f23;
	st.param.f32 	[__cudaretf__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi+12], %f97;
	ret;
$LDWend__Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi:
	} // _Z40BlendMode_PixelFn_IR_BlendMode_Exclusion8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi[16]) _Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi, .param .s32 __cudaparmf4__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi)
	{
	.reg .f32 %f<90>;
	.reg .pred %p<3>;
	.loc	22	486	0
$LDWbegin__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_68_10498;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_68_10242;
$Lt_68_10498:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	max.ftz.f32 	%f38, %f10, %f37;
	mov.f32 	%f39, 0f3f800000;    	// 1
	min.ftz.f32 	%f40, %f38, %f39;
	mov.f32 	%f41, 0f00000000;    	// 0
	max.ftz.f32 	%f42, %f2, %f41;
	mov.f32 	%f43, 0f3f800000;    	// 1
	min.ftz.f32 	%f44, %f42, %f43;
	sub.ftz.f32 	%f45, %f40, %f44;
	mov.f32 	%f46, 0f00000000;    	// 0
	max.ftz.f32 	%f47, %f45, %f46;
	mov.f32 	%f48, 0f3f800000;    	// 1
	min.ftz.f32 	%f49, %f47, %f48;
	mul.ftz.f32 	%f50, %f16, %f49;
	fma.rn.ftz.f32 	%f51, %f2, %f31, %f50;
	mul.ftz.f32 	%f52, %f33, %f51;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f52;
	mov.f32 	%f53, 0f00000000;    	// 0
	max.ftz.f32 	%f54, %f12, %f53;
	mov.f32 	%f55, 0f3f800000;    	// 1
	min.ftz.f32 	%f56, %f54, %f55;
	mov.f32 	%f57, 0f00000000;    	// 0
	max.ftz.f32 	%f58, %f4, %f57;
	mov.f32 	%f59, 0f3f800000;    	// 1
	min.ftz.f32 	%f60, %f58, %f59;
	sub.ftz.f32 	%f61, %f56, %f60;
	mov.f32 	%f62, 0f00000000;    	// 0
	max.ftz.f32 	%f63, %f61, %f62;
	mov.f32 	%f64, 0f3f800000;    	// 1
	min.ftz.f32 	%f65, %f63, %f64;
	mul.ftz.f32 	%f66, %f16, %f65;
	fma.rn.ftz.f32 	%f67, %f4, %f31, %f66;
	mul.ftz.f32 	%f68, %f33, %f67;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f68;
	mov.f32 	%f69, 0f00000000;    	// 0
	max.ftz.f32 	%f70, %f14, %f69;
	mov.f32 	%f71, 0f3f800000;    	// 1
	min.ftz.f32 	%f72, %f70, %f71;
	mov.f32 	%f73, 0f00000000;    	// 0
	max.ftz.f32 	%f74, %f6, %f73;
	mov.f32 	%f75, 0f3f800000;    	// 1
	min.ftz.f32 	%f76, %f74, %f75;
	sub.ftz.f32 	%f77, %f72, %f76;
	mov.f32 	%f78, 0f00000000;    	// 0
	max.ftz.f32 	%f79, %f77, %f78;
	mov.f32 	%f80, 0f3f800000;    	// 1
	min.ftz.f32 	%f81, %f79, %f80;
	mul.ftz.f32 	%f82, %f16, %f81;
	fma.rn.ftz.f32 	%f83, %f6, %f31, %f82;
	mul.ftz.f32 	%f84, %f33, %f83;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f84;
$Lt_68_10242:
	mov.f32 	%f85, %f29;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+0], %f85;
	mov.f32 	%f86, %f28;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+4], %f86;
	mov.f32 	%f87, %f27;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+8], %f87;
	mov.f32 	%f88, %f23;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi+12], %f88;
	ret;
$LDWend__Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi:
	} // _Z39BlendMode_PixelFn_IR_BlendMode_Subtract8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi[16]) _Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi, .param .s32 __cudaparmf4__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi)
	{
	.reg .f32 %f<90>;
	.reg .pred %p<3>;
	.loc	22	487	0
$LDWbegin__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	mul.ftz.f32 	%f19, %f18, %f8;
	add.ftz.f32 	%f20, %f19, %f16;
	mul.ftz.f32 	%f21, %f19, %f16;
	sub.ftz.f32 	%f22, %f20, %f21;
	mov.f32 	%f23, %f22;
	mov.f32 	%f24, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f25, %f22, %f24;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f25, %f26;
	@!%p1 bra 	$Lt_69_10498;
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_69_10242;
$Lt_69_10498:
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f16;
	rcp.approx.ftz.f32 	%f32, %f22;
	mul.ftz.f32 	%f33, %f19, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	mul.ftz.f32 	%f35, %f19, %f32;
	sub.ftz.f32 	%f36, %f34, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	max.ftz.f32 	%f38, %f10, %f37;
	mov.f32 	%f39, 0f3f800000;    	// 1
	min.ftz.f32 	%f40, %f38, %f39;
	mov.f32 	%f41, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f42, %f2, %f41;
	mov.f32 	%f43, 0f3f800000;    	// 1
	min.ftz.f32 	%f44, %f42, %f43;
	div.approx.ftz.f32 	%f45, %f40, %f44;
	mov.f32 	%f46, 0f00000000;    	// 0
	max.ftz.f32 	%f47, %f45, %f46;
	mov.f32 	%f48, 0f3f800000;    	// 1
	min.ftz.f32 	%f49, %f47, %f48;
	mul.ftz.f32 	%f50, %f16, %f49;
	fma.rn.ftz.f32 	%f51, %f2, %f31, %f50;
	mul.ftz.f32 	%f52, %f33, %f51;
	fma.rn.ftz.f32 	%f29, %f10, %f36, %f52;
	mov.f32 	%f53, 0f00000000;    	// 0
	max.ftz.f32 	%f54, %f12, %f53;
	mov.f32 	%f55, 0f3f800000;    	// 1
	min.ftz.f32 	%f56, %f54, %f55;
	mov.f32 	%f57, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f58, %f4, %f57;
	mov.f32 	%f59, 0f3f800000;    	// 1
	min.ftz.f32 	%f60, %f58, %f59;
	div.approx.ftz.f32 	%f61, %f56, %f60;
	mov.f32 	%f62, 0f00000000;    	// 0
	max.ftz.f32 	%f63, %f61, %f62;
	mov.f32 	%f64, 0f3f800000;    	// 1
	min.ftz.f32 	%f65, %f63, %f64;
	mul.ftz.f32 	%f66, %f16, %f65;
	fma.rn.ftz.f32 	%f67, %f4, %f31, %f66;
	mul.ftz.f32 	%f68, %f33, %f67;
	fma.rn.ftz.f32 	%f28, %f12, %f36, %f68;
	mov.f32 	%f69, 0f00000000;    	// 0
	max.ftz.f32 	%f70, %f14, %f69;
	mov.f32 	%f71, 0f3f800000;    	// 1
	min.ftz.f32 	%f72, %f70, %f71;
	mov.f32 	%f73, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f74, %f6, %f73;
	mov.f32 	%f75, 0f3f800000;    	// 1
	min.ftz.f32 	%f76, %f74, %f75;
	div.approx.ftz.f32 	%f77, %f72, %f76;
	mov.f32 	%f78, 0f00000000;    	// 0
	max.ftz.f32 	%f79, %f77, %f78;
	mov.f32 	%f80, 0f3f800000;    	// 1
	min.ftz.f32 	%f81, %f79, %f80;
	mul.ftz.f32 	%f82, %f16, %f81;
	fma.rn.ftz.f32 	%f83, %f6, %f31, %f82;
	mul.ftz.f32 	%f84, %f33, %f83;
	fma.rn.ftz.f32 	%f27, %f14, %f36, %f84;
$Lt_69_10242:
	mov.f32 	%f85, %f29;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+0], %f85;
	mov.f32 	%f86, %f28;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+4], %f86;
	mov.f32 	%f87, %f27;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+8], %f87;
	mov.f32 	%f88, %f23;
	st.param.f32 	[__cudaretf__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi+12], %f88;
	ret;
$LDWend__Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi:
	} // _Z37BlendMode_PixelFn_IR_BlendMode_Divide8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi[16]) _Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi, .param .s32 __cudaparmf4__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi)
	{
	.reg .f32 %f<132>;
	.reg .pred %p<20>;
	.loc	22	494	0
$LDWbegin__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	.loc	22	154	0
	setp.lt.ftz.f32 	%p1, %f2, %f4;
	max.ftz.f32 	%f19, %f2, %f4;
	selp.f32 	%f20, %f2, %f4, %p1;
	max.ftz.f32 	%f21, %f19, %f6;
	setp.lt.ftz.f32 	%p2, %f20, %f6;
	selp.f32 	%f22, %f20, %f6, %p2;
	setp.eq.ftz.f32 	%p3, %f22, %f6;
	@!%p3 bra 	$Lt_70_18946;
	setp.eq.ftz.f32 	%p4, %f21, %f4;
	@!%p4 bra 	$Lt_70_19458;
	setp.gt.ftz.f32 	%p5, %f4, %f6;
	@!%p5 bra 	$Lt_70_19970;
	.loc	22	161	0
	max.ftz.f32 	%f23, %f10, %f12;
	setp.lt.ftz.f32 	%p6, %f10, %f12;
	max.ftz.f32 	%f24, %f23, %f14;
	selp.f32 	%f25, %f10, %f12, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f14;
	selp.f32 	%f26, %f25, %f14, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f29, %f2, %f6;
	mul.ftz.f32 	%f30, %f28, %f29;
	sub.ftz.f32 	%f31, %f4, %f6;
	div.approx.ftz.f32 	%f32, %f30, %f31;
	.loc	22	162	0
	mov.f32 	%f33, %f28;
	bra.uni 	$Lt_70_20226;
$Lt_70_19970:
	.loc	22	166	0
	mov.f32 	%f32, 0f00000000;    	// 0
	mov.f32 	%f33, 0f00000000;    	// 0
	bra.uni 	$Lt_70_20226;
$Lt_70_19458:
	setp.gt.ftz.f32 	%p8, %f2, %f6;
	@!%p8 bra 	$Lt_70_20482;
	.loc	22	173	0
	max.ftz.f32 	%f23, %f10, %f12;
	setp.lt.ftz.f32 	%p6, %f10, %f12;
	max.ftz.f32 	%f24, %f23, %f14;
	selp.f32 	%f25, %f10, %f12, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f14;
	selp.f32 	%f26, %f25, %f14, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f34, %f4, %f6;
	mul.ftz.f32 	%f35, %f28, %f34;
	sub.ftz.f32 	%f36, %f2, %f6;
	div.approx.ftz.f32 	%f33, %f35, %f36;
	.loc	22	174	0
	mov.f32 	%f32, %f28;
	bra.uni 	$Lt_70_20226;
$Lt_70_20482:
	.loc	22	178	0
	mov.f32 	%f32, 0f00000000;    	// 0
	mov.f32 	%f33, 0f00000000;    	// 0
$Lt_70_20226:
$Lt_70_19202:
	mov.f32 	%f37, 0f00000000;    	// 0
	bra.uni 	$Lt_70_22786;
$Lt_70_18946:
	setp.eq.ftz.f32 	%p9, %f22, %f4;
	setp.eq.ftz.f32 	%p10, %f21, %f6;
	@!%p10 bra 	$Lt_70_20994;
	@!%p9 bra 	$Lt_70_21506;
	setp.lt.ftz.f32 	%p11, %f4, %f6;
	@!%p11 bra 	$Lt_70_22018;
	.loc	22	191	0
	max.ftz.f32 	%f23, %f10, %f12;
	setp.lt.ftz.f32 	%p6, %f10, %f12;
	max.ftz.f32 	%f24, %f23, %f14;
	selp.f32 	%f25, %f10, %f12, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f14;
	selp.f32 	%f26, %f25, %f14, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f38, %f2, %f4;
	mul.ftz.f32 	%f39, %f28, %f38;
	sub.ftz.f32 	%f40, %f6, %f4;
	div.approx.ftz.f32 	%f32, %f39, %f40;
	.loc	22	192	0
	mov.f32 	%f37, %f28;
	bra.uni 	$Lt_70_21762;
$Lt_70_22018:
	.loc	22	196	0
	mov.f32 	%f32, 0f00000000;    	// 0
	mov.f32 	%f37, 0f00000000;    	// 0
$Lt_70_21762:
	mov.f32 	%f33, 0f00000000;    	// 0
	bra.uni 	$Lt_70_22786;
$Lt_70_21506:
	setp.lt.ftz.f32 	%p12, %f2, %f6;
	@!%p12 bra 	$Lt_70_22530;
	.loc	22	204	0
	max.ftz.f32 	%f23, %f10, %f12;
	setp.lt.ftz.f32 	%p6, %f10, %f12;
	max.ftz.f32 	%f24, %f23, %f14;
	selp.f32 	%f25, %f10, %f12, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f14;
	selp.f32 	%f26, %f25, %f14, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f41, %f4, %f2;
	mul.ftz.f32 	%f42, %f28, %f41;
	sub.ftz.f32 	%f43, %f6, %f2;
	div.approx.ftz.f32 	%f33, %f42, %f43;
	.loc	22	205	0
	mov.f32 	%f37, %f28;
	bra.uni 	$Lt_70_22274;
$Lt_70_22530:
	.loc	22	209	0
	mov.f32 	%f37, 0f00000000;    	// 0
	mov.f32 	%f33, 0f00000000;    	// 0
$Lt_70_22274:
	.loc	22	211	0
	mov.f32 	%f32, 0f00000000;    	// 0
	bra.uni 	$Lt_70_22786;
$Lt_70_20994:
	@!%p9 bra 	$Lt_70_23042;
	setp.gt.ftz.f32 	%p13, %f2, %f4;
	@!%p13 bra 	$Lt_70_23554;
	.loc	22	220	0
	max.ftz.f32 	%f23, %f10, %f12;
	setp.lt.ftz.f32 	%p6, %f10, %f12;
	max.ftz.f32 	%f24, %f23, %f14;
	selp.f32 	%f25, %f10, %f12, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f14;
	selp.f32 	%f26, %f25, %f14, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f44, %f6, %f4;
	mul.ftz.f32 	%f45, %f28, %f44;
	sub.ftz.f32 	%f46, %f2, %f4;
	div.approx.ftz.f32 	%f37, %f45, %f46;
	.loc	22	221	0
	mov.f32 	%f32, %f28;
	bra.uni 	$Lt_70_23298;
$Lt_70_23554:
	.loc	22	225	0
	mov.f32 	%f32, 0f00000000;    	// 0
	mov.f32 	%f37, 0f00000000;    	// 0
$Lt_70_23298:
	mov.f32 	%f33, 0f00000000;    	// 0
	bra.uni 	$Lt_70_22786;
$Lt_70_23042:
	@!%p1 bra 	$Lt_70_24066;
	.loc	22	233	0
	max.ftz.f32 	%f23, %f10, %f12;
	setp.lt.ftz.f32 	%p6, %f10, %f12;
	max.ftz.f32 	%f24, %f23, %f14;
	selp.f32 	%f25, %f10, %f12, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f14;
	selp.f32 	%f26, %f25, %f14, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f47, %f6, %f2;
	mul.ftz.f32 	%f48, %f28, %f47;
	sub.ftz.f32 	%f49, %f4, %f2;
	div.approx.ftz.f32 	%f37, %f48, %f49;
	.loc	22	234	0
	mov.f32 	%f33, %f28;
	bra.uni 	$Lt_70_23810;
$Lt_70_24066:
	.loc	22	238	0
	mov.f32 	%f37, 0f00000000;    	// 0
	mov.f32 	%f33, 0f00000000;    	// 0
$Lt_70_23810:
	.loc	22	240	0
	mov.f32 	%f32, 0f00000000;    	// 0
$Lt_70_22786:
$Lt_70_20738:
$Lt_70_18690:
	.loc	22	113	0
	ld.const.f32 	%f50, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f51, %f12, %f50;
	mul.ftz.f32 	%f52, %f50, %f33;
	ld.const.f32 	%f53, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f54, %f53, %f14, %f51;
	fma.rn.ftz.f32 	%f55, %f53, %f37, %f52;
	ld.const.f32 	%f56, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f57, %f56, %f10, %f54;
	fma.rn.ftz.f32 	%f58, %f56, %f32, %f55;
	cvt.ftz.sat.f32.f32 	%f59, %f57;
	cvt.ftz.sat.f32.f32 	%f60, %f58;
	sub.ftz.f32 	%f61, %f59, %f60;
	add.ftz.f32 	%f62, %f61, %f32;
	mov.f32 	%f63, %f62;
	add.ftz.f32 	%f64, %f61, %f33;
	mov.f32 	%f65, %f64;
	add.ftz.f32 	%f66, %f61, %f37;
	mov.f32 	%f67, %f66;
	.loc	22	50	0
	mul.ftz.f32 	%f68, %f64, %f50;
	fma.rn.ftz.f32 	%f69, %f53, %f66, %f68;
	fma.rn.ftz.f32 	%f70, %f56, %f62, %f69;
	cvt.ftz.sat.f32.f32 	%f71, %f70;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p14, %f64, %f62;
	selp.f32 	%f72, %f62, %f64, %p14;
	setp.lt.ftz.f32 	%p15, %f72, %f66;
	selp.f32 	%f73, %f72, %f66, %p15;
	mov.f32 	%f74, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p16, %f73, %f74;
	@!%p16 bra 	$Lt_70_24322;
	.loc	22	119	0
	sub.ftz.f32 	%f75, %f71, %f73;
	sub.ftz.f32 	%f76, %f66, %f71;
	mul.ftz.f32 	%f77, %f71, %f76;
	div.approx.ftz.f32 	%f78, %f77, %f75;
	add.ftz.f32 	%f67, %f71, %f78;
	.loc	22	120	0
	sub.ftz.f32 	%f79, %f64, %f71;
	mul.ftz.f32 	%f80, %f71, %f79;
	div.approx.ftz.f32 	%f81, %f80, %f75;
	add.ftz.f32 	%f65, %f71, %f81;
	.loc	22	121	0
	sub.ftz.f32 	%f82, %f62, %f71;
	mul.ftz.f32 	%f83, %f71, %f82;
	div.approx.ftz.f32 	%f84, %f83, %f75;
	add.ftz.f32 	%f63, %f71, %f84;
$Lt_70_24322:
	max.ftz.f32 	%f85, %f64, %f62;
	max.ftz.f32 	%f86, %f85, %f66;
	mov.f32 	%f87, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p17, %f86, %f87;
	@!%p17 bra 	$Lt_70_24834;
	.loc	27	529	0
	mov.f32 	%f88, 0f3f800000;    	// 1
	sub.ftz.f32 	%f89, %f88, %f71;
	sub.ftz.f32 	%f90, %f86, %f71;
	sub.ftz.f32 	%f91, %f67, %f71;
	mul.ftz.f32 	%f92, %f89, %f91;
	div.approx.ftz.f32 	%f93, %f92, %f90;
	.loc	22	125	0
	add.ftz.f32 	%f67, %f93, %f71;
	.loc	27	529	0
	sub.ftz.f32 	%f94, %f65, %f71;
	mul.ftz.f32 	%f95, %f89, %f94;
	div.approx.ftz.f32 	%f96, %f95, %f90;
	.loc	22	126	0
	add.ftz.f32 	%f65, %f96, %f71;
	.loc	27	529	0
	sub.ftz.f32 	%f97, %f63, %f71;
	mul.ftz.f32 	%f98, %f89, %f97;
	div.approx.ftz.f32 	%f99, %f98, %f90;
	.loc	22	127	0
	add.ftz.f32 	%f63, %f99, %f71;
$Lt_70_24834:
	.loc	22	468	0
	mul.ftz.f32 	%f100, %f18, %f8;
	add.ftz.f32 	%f101, %f100, %f16;
	mul.ftz.f32 	%f102, %f100, %f16;
	sub.ftz.f32 	%f103, %f101, %f102;
	mov.f32 	%f104, %f103;
	mov.f32 	%f105, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f106, %f103, %f105;
	mov.f32 	%f107, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p18, %f106, %f107;
	@!%p18 bra 	$Lt_70_25602;
	mov.f32 	%f108, 0f00000000;   	// 0
	mov.f32 	%f109, 0f00000000;   	// 0
	mov.f32 	%f110, 0f00000000;   	// 0
	mov.f32 	%f104, 0f00000000;   	// 0
	bra.uni 	$Lt_70_25346;
$Lt_70_25602:
	mov.f32 	%f111, 0f3f800000;   	// 1
	sub.ftz.f32 	%f112, %f111, %f16;
	rcp.approx.ftz.f32 	%f113, %f103;
	mul.ftz.f32 	%f114, %f100, %f113;
	mov.f32 	%f115, 0f3f800000;   	// 1
	mul.ftz.f32 	%f116, %f100, %f113;
	sub.ftz.f32 	%f117, %f115, %f116;
	mul.ftz.f32 	%f118, %f112, %f63;
	fma.rn.ftz.f32 	%f119, %f63, %f16, %f118;
	mul.ftz.f32 	%f120, %f114, %f119;
	fma.rn.ftz.f32 	%f110, %f10, %f117, %f120;
	mul.ftz.f32 	%f121, %f112, %f65;
	fma.rn.ftz.f32 	%f122, %f65, %f16, %f121;
	mul.ftz.f32 	%f123, %f114, %f122;
	fma.rn.ftz.f32 	%f109, %f12, %f117, %f123;
	mul.ftz.f32 	%f124, %f112, %f67;
	fma.rn.ftz.f32 	%f125, %f67, %f16, %f124;
	mul.ftz.f32 	%f126, %f114, %f125;
	fma.rn.ftz.f32 	%f108, %f14, %f117, %f126;
$Lt_70_25346:
	.loc	22	497	0
	mov.f32 	%f127, %f110;
	st.param.f32 	[__cudaretf__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+0], %f127;
	mov.f32 	%f128, %f109;
	st.param.f32 	[__cudaretf__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+4], %f128;
	mov.f32 	%f129, %f108;
	st.param.f32 	[__cudaretf__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+8], %f129;
	mov.f32 	%f130, %f104;
	st.param.f32 	[__cudaretf__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi+12], %f130;
	ret;
$LDWend__Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi:
	} // _Z34BlendMode_PixelFn_IR_BlendMode_Hue8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi)
	{
	.reg .f32 %f<132>;
	.reg .pred %p<20>;
	.loc	22	500	0
$LDWbegin__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	.loc	22	154	0
	setp.lt.ftz.f32 	%p1, %f10, %f12;
	max.ftz.f32 	%f19, %f10, %f12;
	selp.f32 	%f20, %f10, %f12, %p1;
	max.ftz.f32 	%f21, %f19, %f14;
	setp.lt.ftz.f32 	%p2, %f20, %f14;
	selp.f32 	%f22, %f20, %f14, %p2;
	setp.eq.ftz.f32 	%p3, %f22, %f14;
	@!%p3 bra 	$Lt_71_18946;
	setp.eq.ftz.f32 	%p4, %f21, %f12;
	@!%p4 bra 	$Lt_71_19458;
	setp.gt.ftz.f32 	%p5, %f12, %f14;
	@!%p5 bra 	$Lt_71_19970;
	.loc	22	161	0
	max.ftz.f32 	%f23, %f2, %f4;
	setp.lt.ftz.f32 	%p6, %f2, %f4;
	max.ftz.f32 	%f24, %f23, %f6;
	selp.f32 	%f25, %f2, %f4, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f6;
	selp.f32 	%f26, %f25, %f6, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f29, %f10, %f14;
	mul.ftz.f32 	%f30, %f28, %f29;
	sub.ftz.f32 	%f31, %f12, %f14;
	div.approx.ftz.f32 	%f32, %f30, %f31;
	.loc	22	162	0
	mov.f32 	%f33, %f28;
	bra.uni 	$Lt_71_20226;
$Lt_71_19970:
	.loc	22	166	0
	mov.f32 	%f32, 0f00000000;    	// 0
	mov.f32 	%f33, 0f00000000;    	// 0
	bra.uni 	$Lt_71_20226;
$Lt_71_19458:
	setp.gt.ftz.f32 	%p8, %f10, %f14;
	@!%p8 bra 	$Lt_71_20482;
	.loc	22	173	0
	max.ftz.f32 	%f23, %f2, %f4;
	setp.lt.ftz.f32 	%p6, %f2, %f4;
	max.ftz.f32 	%f24, %f23, %f6;
	selp.f32 	%f25, %f2, %f4, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f6;
	selp.f32 	%f26, %f25, %f6, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f34, %f12, %f14;
	mul.ftz.f32 	%f35, %f28, %f34;
	sub.ftz.f32 	%f36, %f10, %f14;
	div.approx.ftz.f32 	%f33, %f35, %f36;
	.loc	22	174	0
	mov.f32 	%f32, %f28;
	bra.uni 	$Lt_71_20226;
$Lt_71_20482:
	.loc	22	178	0
	mov.f32 	%f32, 0f00000000;    	// 0
	mov.f32 	%f33, 0f00000000;    	// 0
$Lt_71_20226:
$Lt_71_19202:
	mov.f32 	%f37, 0f00000000;    	// 0
	bra.uni 	$Lt_71_22786;
$Lt_71_18946:
	setp.eq.ftz.f32 	%p9, %f22, %f12;
	setp.eq.ftz.f32 	%p10, %f21, %f14;
	@!%p10 bra 	$Lt_71_20994;
	@!%p9 bra 	$Lt_71_21506;
	setp.lt.ftz.f32 	%p11, %f12, %f14;
	@!%p11 bra 	$Lt_71_22018;
	.loc	22	191	0
	max.ftz.f32 	%f23, %f2, %f4;
	setp.lt.ftz.f32 	%p6, %f2, %f4;
	max.ftz.f32 	%f24, %f23, %f6;
	selp.f32 	%f25, %f2, %f4, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f6;
	selp.f32 	%f26, %f25, %f6, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f38, %f10, %f12;
	mul.ftz.f32 	%f39, %f28, %f38;
	sub.ftz.f32 	%f40, %f14, %f12;
	div.approx.ftz.f32 	%f32, %f39, %f40;
	.loc	22	192	0
	mov.f32 	%f37, %f28;
	bra.uni 	$Lt_71_21762;
$Lt_71_22018:
	.loc	22	196	0
	mov.f32 	%f32, 0f00000000;    	// 0
	mov.f32 	%f37, 0f00000000;    	// 0
$Lt_71_21762:
	mov.f32 	%f33, 0f00000000;    	// 0
	bra.uni 	$Lt_71_22786;
$Lt_71_21506:
	setp.lt.ftz.f32 	%p12, %f10, %f14;
	@!%p12 bra 	$Lt_71_22530;
	.loc	22	204	0
	max.ftz.f32 	%f23, %f2, %f4;
	setp.lt.ftz.f32 	%p6, %f2, %f4;
	max.ftz.f32 	%f24, %f23, %f6;
	selp.f32 	%f25, %f2, %f4, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f6;
	selp.f32 	%f26, %f25, %f6, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f41, %f12, %f10;
	mul.ftz.f32 	%f42, %f28, %f41;
	sub.ftz.f32 	%f43, %f14, %f10;
	div.approx.ftz.f32 	%f33, %f42, %f43;
	.loc	22	205	0
	mov.f32 	%f37, %f28;
	bra.uni 	$Lt_71_22274;
$Lt_71_22530:
	.loc	22	209	0
	mov.f32 	%f37, 0f00000000;    	// 0
	mov.f32 	%f33, 0f00000000;    	// 0
$Lt_71_22274:
	.loc	22	211	0
	mov.f32 	%f32, 0f00000000;    	// 0
	bra.uni 	$Lt_71_22786;
$Lt_71_20994:
	@!%p9 bra 	$Lt_71_23042;
	setp.gt.ftz.f32 	%p13, %f10, %f12;
	@!%p13 bra 	$Lt_71_23554;
	.loc	22	220	0
	max.ftz.f32 	%f23, %f2, %f4;
	setp.lt.ftz.f32 	%p6, %f2, %f4;
	max.ftz.f32 	%f24, %f23, %f6;
	selp.f32 	%f25, %f2, %f4, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f6;
	selp.f32 	%f26, %f25, %f6, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f44, %f14, %f12;
	mul.ftz.f32 	%f45, %f28, %f44;
	sub.ftz.f32 	%f46, %f10, %f12;
	div.approx.ftz.f32 	%f37, %f45, %f46;
	.loc	22	221	0
	mov.f32 	%f32, %f28;
	bra.uni 	$Lt_71_23298;
$Lt_71_23554:
	.loc	22	225	0
	mov.f32 	%f32, 0f00000000;    	// 0
	mov.f32 	%f37, 0f00000000;    	// 0
$Lt_71_23298:
	mov.f32 	%f33, 0f00000000;    	// 0
	bra.uni 	$Lt_71_22786;
$Lt_71_23042:
	@!%p1 bra 	$Lt_71_24066;
	.loc	22	233	0
	max.ftz.f32 	%f23, %f2, %f4;
	setp.lt.ftz.f32 	%p6, %f2, %f4;
	max.ftz.f32 	%f24, %f23, %f6;
	selp.f32 	%f25, %f2, %f4, %p6;
	setp.lt.ftz.f32 	%p7, %f25, %f6;
	selp.f32 	%f26, %f25, %f6, %p7;
	sub.ftz.f32 	%f27, %f24, %f26;
	cvt.ftz.sat.f32.f32 	%f28, %f27;
	sub.ftz.f32 	%f47, %f14, %f10;
	mul.ftz.f32 	%f48, %f28, %f47;
	sub.ftz.f32 	%f49, %f12, %f10;
	div.approx.ftz.f32 	%f37, %f48, %f49;
	.loc	22	234	0
	mov.f32 	%f33, %f28;
	bra.uni 	$Lt_71_23810;
$Lt_71_24066:
	.loc	22	238	0
	mov.f32 	%f37, 0f00000000;    	// 0
	mov.f32 	%f33, 0f00000000;    	// 0
$Lt_71_23810:
	.loc	22	240	0
	mov.f32 	%f32, 0f00000000;    	// 0
$Lt_71_22786:
$Lt_71_20738:
$Lt_71_18690:
	.loc	22	113	0
	ld.const.f32 	%f50, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f51, %f12, %f50;
	mul.ftz.f32 	%f52, %f50, %f33;
	ld.const.f32 	%f53, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f54, %f53, %f14, %f51;
	fma.rn.ftz.f32 	%f55, %f53, %f37, %f52;
	ld.const.f32 	%f56, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f57, %f56, %f10, %f54;
	fma.rn.ftz.f32 	%f58, %f56, %f32, %f55;
	cvt.ftz.sat.f32.f32 	%f59, %f57;
	cvt.ftz.sat.f32.f32 	%f60, %f58;
	sub.ftz.f32 	%f61, %f59, %f60;
	add.ftz.f32 	%f62, %f61, %f32;
	mov.f32 	%f63, %f62;
	add.ftz.f32 	%f64, %f61, %f33;
	mov.f32 	%f65, %f64;
	add.ftz.f32 	%f66, %f61, %f37;
	mov.f32 	%f67, %f66;
	.loc	22	50	0
	mul.ftz.f32 	%f68, %f64, %f50;
	fma.rn.ftz.f32 	%f69, %f53, %f66, %f68;
	fma.rn.ftz.f32 	%f70, %f56, %f62, %f69;
	cvt.ftz.sat.f32.f32 	%f71, %f70;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p14, %f64, %f62;
	selp.f32 	%f72, %f62, %f64, %p14;
	setp.lt.ftz.f32 	%p15, %f72, %f66;
	selp.f32 	%f73, %f72, %f66, %p15;
	mov.f32 	%f74, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p16, %f73, %f74;
	@!%p16 bra 	$Lt_71_24322;
	.loc	22	119	0
	sub.ftz.f32 	%f75, %f71, %f73;
	sub.ftz.f32 	%f76, %f66, %f71;
	mul.ftz.f32 	%f77, %f71, %f76;
	div.approx.ftz.f32 	%f78, %f77, %f75;
	add.ftz.f32 	%f67, %f71, %f78;
	.loc	22	120	0
	sub.ftz.f32 	%f79, %f64, %f71;
	mul.ftz.f32 	%f80, %f71, %f79;
	div.approx.ftz.f32 	%f81, %f80, %f75;
	add.ftz.f32 	%f65, %f71, %f81;
	.loc	22	121	0
	sub.ftz.f32 	%f82, %f62, %f71;
	mul.ftz.f32 	%f83, %f71, %f82;
	div.approx.ftz.f32 	%f84, %f83, %f75;
	add.ftz.f32 	%f63, %f71, %f84;
$Lt_71_24322:
	max.ftz.f32 	%f85, %f64, %f62;
	max.ftz.f32 	%f86, %f85, %f66;
	mov.f32 	%f87, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p17, %f86, %f87;
	@!%p17 bra 	$Lt_71_24834;
	.loc	27	529	0
	mov.f32 	%f88, 0f3f800000;    	// 1
	sub.ftz.f32 	%f89, %f88, %f71;
	sub.ftz.f32 	%f90, %f86, %f71;
	sub.ftz.f32 	%f91, %f67, %f71;
	mul.ftz.f32 	%f92, %f89, %f91;
	div.approx.ftz.f32 	%f93, %f92, %f90;
	.loc	22	125	0
	add.ftz.f32 	%f67, %f93, %f71;
	.loc	27	529	0
	sub.ftz.f32 	%f94, %f65, %f71;
	mul.ftz.f32 	%f95, %f89, %f94;
	div.approx.ftz.f32 	%f96, %f95, %f90;
	.loc	22	126	0
	add.ftz.f32 	%f65, %f96, %f71;
	.loc	27	529	0
	sub.ftz.f32 	%f97, %f63, %f71;
	mul.ftz.f32 	%f98, %f89, %f97;
	div.approx.ftz.f32 	%f99, %f98, %f90;
	.loc	22	127	0
	add.ftz.f32 	%f63, %f99, %f71;
$Lt_71_24834:
	.loc	22	468	0
	mul.ftz.f32 	%f100, %f18, %f8;
	add.ftz.f32 	%f101, %f100, %f16;
	mul.ftz.f32 	%f102, %f100, %f16;
	sub.ftz.f32 	%f103, %f101, %f102;
	mov.f32 	%f104, %f103;
	mov.f32 	%f105, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f106, %f103, %f105;
	mov.f32 	%f107, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p18, %f106, %f107;
	@!%p18 bra 	$Lt_71_25602;
	mov.f32 	%f108, 0f00000000;   	// 0
	mov.f32 	%f109, 0f00000000;   	// 0
	mov.f32 	%f110, 0f00000000;   	// 0
	mov.f32 	%f104, 0f00000000;   	// 0
	bra.uni 	$Lt_71_25346;
$Lt_71_25602:
	mov.f32 	%f111, 0f3f800000;   	// 1
	sub.ftz.f32 	%f112, %f111, %f16;
	rcp.approx.ftz.f32 	%f113, %f103;
	mul.ftz.f32 	%f114, %f100, %f113;
	mov.f32 	%f115, 0f3f800000;   	// 1
	mul.ftz.f32 	%f116, %f100, %f113;
	sub.ftz.f32 	%f117, %f115, %f116;
	mul.ftz.f32 	%f118, %f112, %f63;
	fma.rn.ftz.f32 	%f119, %f63, %f16, %f118;
	mul.ftz.f32 	%f120, %f114, %f119;
	fma.rn.ftz.f32 	%f110, %f10, %f117, %f120;
	mul.ftz.f32 	%f121, %f112, %f65;
	fma.rn.ftz.f32 	%f122, %f65, %f16, %f121;
	mul.ftz.f32 	%f123, %f114, %f122;
	fma.rn.ftz.f32 	%f109, %f12, %f117, %f123;
	mul.ftz.f32 	%f124, %f112, %f67;
	fma.rn.ftz.f32 	%f125, %f67, %f16, %f124;
	mul.ftz.f32 	%f126, %f114, %f125;
	fma.rn.ftz.f32 	%f108, %f14, %f117, %f126;
$Lt_71_25346:
	.loc	22	504	0
	mov.f32 	%f127, %f110;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+0], %f127;
	mov.f32 	%f128, %f109;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+4], %f128;
	mov.f32 	%f129, %f108;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+8], %f129;
	mov.f32 	%f130, %f104;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi+12], %f130;
	ret;
$LDWend__Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi:
	} // _Z41BlendMode_PixelFn_IR_BlendMode_Saturation8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi[16]) _Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi, .param .s32 __cudaparmf4__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi)
	{
	.reg .f32 %f<101>;
	.reg .pred %p<7>;
	.loc	22	507	0
$LDWbegin__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	.loc	22	113	0
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f20, %f12, %f19;
	mul.ftz.f32 	%f21, %f4, %f19;
	ld.const.f32 	%f22, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f23, %f22, %f14, %f20;
	fma.rn.ftz.f32 	%f24, %f22, %f6, %f21;
	ld.const.f32 	%f25, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f26, %f25, %f10, %f23;
	fma.rn.ftz.f32 	%f27, %f25, %f2, %f24;
	cvt.ftz.sat.f32.f32 	%f28, %f26;
	cvt.ftz.sat.f32.f32 	%f29, %f27;
	sub.ftz.f32 	%f30, %f28, %f29;
	add.ftz.f32 	%f31, %f30, %f2;
	mov.f32 	%f32, %f31;
	add.ftz.f32 	%f33, %f30, %f4;
	mov.f32 	%f34, %f33;
	add.ftz.f32 	%f35, %f30, %f6;
	mov.f32 	%f36, %f35;
	.loc	22	50	0
	mul.ftz.f32 	%f37, %f33, %f19;
	fma.rn.ftz.f32 	%f38, %f22, %f35, %f37;
	fma.rn.ftz.f32 	%f39, %f25, %f31, %f38;
	cvt.ftz.sat.f32.f32 	%f40, %f39;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p1, %f33, %f31;
	selp.f32 	%f41, %f31, %f33, %p1;
	setp.lt.ftz.f32 	%p2, %f41, %f35;
	selp.f32 	%f42, %f41, %f35, %p2;
	mov.f32 	%f43, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f42, %f43;
	@!%p3 bra 	$Lt_72_5122;
	.loc	22	119	0
	sub.ftz.f32 	%f44, %f40, %f42;
	sub.ftz.f32 	%f45, %f35, %f40;
	mul.ftz.f32 	%f46, %f40, %f45;
	div.approx.ftz.f32 	%f47, %f46, %f44;
	add.ftz.f32 	%f36, %f40, %f47;
	.loc	22	120	0
	sub.ftz.f32 	%f48, %f33, %f40;
	mul.ftz.f32 	%f49, %f40, %f48;
	div.approx.ftz.f32 	%f50, %f49, %f44;
	add.ftz.f32 	%f34, %f40, %f50;
	.loc	22	121	0
	sub.ftz.f32 	%f51, %f31, %f40;
	mul.ftz.f32 	%f52, %f40, %f51;
	div.approx.ftz.f32 	%f53, %f52, %f44;
	add.ftz.f32 	%f32, %f40, %f53;
$Lt_72_5122:
	max.ftz.f32 	%f54, %f33, %f31;
	max.ftz.f32 	%f55, %f54, %f35;
	mov.f32 	%f56, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p4, %f55, %f56;
	@!%p4 bra 	$Lt_72_5634;
	.loc	27	529	0
	mov.f32 	%f57, 0f3f800000;    	// 1
	sub.ftz.f32 	%f58, %f57, %f40;
	sub.ftz.f32 	%f59, %f55, %f40;
	sub.ftz.f32 	%f60, %f36, %f40;
	mul.ftz.f32 	%f61, %f58, %f60;
	div.approx.ftz.f32 	%f62, %f61, %f59;
	.loc	22	125	0
	add.ftz.f32 	%f36, %f62, %f40;
	.loc	27	529	0
	sub.ftz.f32 	%f63, %f34, %f40;
	mul.ftz.f32 	%f64, %f58, %f63;
	div.approx.ftz.f32 	%f65, %f64, %f59;
	.loc	22	126	0
	add.ftz.f32 	%f34, %f65, %f40;
	.loc	27	529	0
	sub.ftz.f32 	%f66, %f32, %f40;
	mul.ftz.f32 	%f67, %f58, %f66;
	div.approx.ftz.f32 	%f68, %f67, %f59;
	.loc	22	127	0
	add.ftz.f32 	%f32, %f68, %f40;
$Lt_72_5634:
	.loc	22	468	0
	mul.ftz.f32 	%f69, %f18, %f8;
	add.ftz.f32 	%f70, %f69, %f16;
	mul.ftz.f32 	%f71, %f69, %f16;
	sub.ftz.f32 	%f72, %f70, %f71;
	mov.f32 	%f73, %f72;
	mov.f32 	%f74, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f75, %f72, %f74;
	mov.f32 	%f76, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p5, %f75, %f76;
	@!%p5 bra 	$Lt_72_6402;
	mov.f32 	%f77, 0f00000000;    	// 0
	mov.f32 	%f78, 0f00000000;    	// 0
	mov.f32 	%f79, 0f00000000;    	// 0
	mov.f32 	%f73, 0f00000000;    	// 0
	bra.uni 	$Lt_72_6146;
$Lt_72_6402:
	mov.f32 	%f80, 0f3f800000;    	// 1
	sub.ftz.f32 	%f81, %f80, %f16;
	rcp.approx.ftz.f32 	%f82, %f72;
	mul.ftz.f32 	%f83, %f69, %f82;
	mov.f32 	%f84, 0f3f800000;    	// 1
	mul.ftz.f32 	%f85, %f69, %f82;
	sub.ftz.f32 	%f86, %f84, %f85;
	mul.ftz.f32 	%f87, %f81, %f32;
	fma.rn.ftz.f32 	%f88, %f32, %f16, %f87;
	mul.ftz.f32 	%f89, %f83, %f88;
	fma.rn.ftz.f32 	%f79, %f10, %f86, %f89;
	mul.ftz.f32 	%f90, %f81, %f34;
	fma.rn.ftz.f32 	%f91, %f34, %f16, %f90;
	mul.ftz.f32 	%f92, %f83, %f91;
	fma.rn.ftz.f32 	%f78, %f12, %f86, %f92;
	mul.ftz.f32 	%f93, %f81, %f36;
	fma.rn.ftz.f32 	%f94, %f36, %f16, %f93;
	mul.ftz.f32 	%f95, %f83, %f94;
	fma.rn.ftz.f32 	%f77, %f14, %f86, %f95;
$Lt_72_6146:
	.loc	22	510	0
	mov.f32 	%f96, %f79;
	st.param.f32 	[__cudaretf__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+0], %f96;
	mov.f32 	%f97, %f78;
	st.param.f32 	[__cudaretf__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+4], %f97;
	mov.f32 	%f98, %f77;
	st.param.f32 	[__cudaretf__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+8], %f98;
	mov.f32 	%f99, %f73;
	st.param.f32 	[__cudaretf__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi+12], %f99;
	ret;
$LDWend__Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi:
	} // _Z36BlendMode_PixelFn_IR_BlendMode_Color8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi[16]) _Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi, .param .s32 __cudaparmf4__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi)
	{
	.reg .f32 %f<101>;
	.reg .pred %p<7>;
	.loc	22	513	0
$LDWbegin__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	.loc	22	113	0
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f20, %f4, %f19;
	mul.ftz.f32 	%f21, %f12, %f19;
	ld.const.f32 	%f22, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f23, %f22, %f6, %f20;
	fma.rn.ftz.f32 	%f24, %f22, %f14, %f21;
	ld.const.f32 	%f25, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f26, %f25, %f2, %f23;
	fma.rn.ftz.f32 	%f27, %f25, %f10, %f24;
	cvt.ftz.sat.f32.f32 	%f28, %f26;
	cvt.ftz.sat.f32.f32 	%f29, %f27;
	sub.ftz.f32 	%f30, %f28, %f29;
	add.ftz.f32 	%f31, %f30, %f10;
	mov.f32 	%f32, %f31;
	add.ftz.f32 	%f33, %f30, %f12;
	mov.f32 	%f34, %f33;
	add.ftz.f32 	%f35, %f30, %f14;
	mov.f32 	%f36, %f35;
	.loc	22	50	0
	mul.ftz.f32 	%f37, %f33, %f19;
	fma.rn.ftz.f32 	%f38, %f22, %f35, %f37;
	fma.rn.ftz.f32 	%f39, %f25, %f31, %f38;
	cvt.ftz.sat.f32.f32 	%f40, %f39;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p1, %f33, %f31;
	selp.f32 	%f41, %f31, %f33, %p1;
	setp.lt.ftz.f32 	%p2, %f41, %f35;
	selp.f32 	%f42, %f41, %f35, %p2;
	mov.f32 	%f43, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p3, %f42, %f43;
	@!%p3 bra 	$Lt_73_5122;
	.loc	22	119	0
	sub.ftz.f32 	%f44, %f40, %f42;
	sub.ftz.f32 	%f45, %f35, %f40;
	mul.ftz.f32 	%f46, %f40, %f45;
	div.approx.ftz.f32 	%f47, %f46, %f44;
	add.ftz.f32 	%f36, %f40, %f47;
	.loc	22	120	0
	sub.ftz.f32 	%f48, %f33, %f40;
	mul.ftz.f32 	%f49, %f40, %f48;
	div.approx.ftz.f32 	%f50, %f49, %f44;
	add.ftz.f32 	%f34, %f40, %f50;
	.loc	22	121	0
	sub.ftz.f32 	%f51, %f31, %f40;
	mul.ftz.f32 	%f52, %f40, %f51;
	div.approx.ftz.f32 	%f53, %f52, %f44;
	add.ftz.f32 	%f32, %f40, %f53;
$Lt_73_5122:
	max.ftz.f32 	%f54, %f33, %f31;
	max.ftz.f32 	%f55, %f54, %f35;
	mov.f32 	%f56, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p4, %f55, %f56;
	@!%p4 bra 	$Lt_73_5634;
	.loc	27	529	0
	mov.f32 	%f57, 0f3f800000;    	// 1
	sub.ftz.f32 	%f58, %f57, %f40;
	sub.ftz.f32 	%f59, %f55, %f40;
	sub.ftz.f32 	%f60, %f36, %f40;
	mul.ftz.f32 	%f61, %f58, %f60;
	div.approx.ftz.f32 	%f62, %f61, %f59;
	.loc	22	125	0
	add.ftz.f32 	%f36, %f62, %f40;
	.loc	27	529	0
	sub.ftz.f32 	%f63, %f34, %f40;
	mul.ftz.f32 	%f64, %f58, %f63;
	div.approx.ftz.f32 	%f65, %f64, %f59;
	.loc	22	126	0
	add.ftz.f32 	%f34, %f65, %f40;
	.loc	27	529	0
	sub.ftz.f32 	%f66, %f32, %f40;
	mul.ftz.f32 	%f67, %f58, %f66;
	div.approx.ftz.f32 	%f68, %f67, %f59;
	.loc	22	127	0
	add.ftz.f32 	%f32, %f68, %f40;
$Lt_73_5634:
	.loc	22	468	0
	mul.ftz.f32 	%f69, %f18, %f8;
	add.ftz.f32 	%f70, %f69, %f16;
	mul.ftz.f32 	%f71, %f69, %f16;
	sub.ftz.f32 	%f72, %f70, %f71;
	mov.f32 	%f73, %f72;
	mov.f32 	%f74, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f75, %f72, %f74;
	mov.f32 	%f76, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p5, %f75, %f76;
	@!%p5 bra 	$Lt_73_6402;
	mov.f32 	%f77, 0f00000000;    	// 0
	mov.f32 	%f78, 0f00000000;    	// 0
	mov.f32 	%f79, 0f00000000;    	// 0
	mov.f32 	%f73, 0f00000000;    	// 0
	bra.uni 	$Lt_73_6146;
$Lt_73_6402:
	mov.f32 	%f80, 0f3f800000;    	// 1
	sub.ftz.f32 	%f81, %f80, %f16;
	rcp.approx.ftz.f32 	%f82, %f72;
	mul.ftz.f32 	%f83, %f69, %f82;
	mov.f32 	%f84, 0f3f800000;    	// 1
	mul.ftz.f32 	%f85, %f69, %f82;
	sub.ftz.f32 	%f86, %f84, %f85;
	mul.ftz.f32 	%f87, %f81, %f32;
	fma.rn.ftz.f32 	%f88, %f32, %f16, %f87;
	mul.ftz.f32 	%f89, %f83, %f88;
	fma.rn.ftz.f32 	%f79, %f10, %f86, %f89;
	mul.ftz.f32 	%f90, %f81, %f34;
	fma.rn.ftz.f32 	%f91, %f34, %f16, %f90;
	mul.ftz.f32 	%f92, %f83, %f91;
	fma.rn.ftz.f32 	%f78, %f12, %f86, %f92;
	mul.ftz.f32 	%f93, %f81, %f36;
	fma.rn.ftz.f32 	%f94, %f36, %f16, %f93;
	mul.ftz.f32 	%f95, %f83, %f94;
	fma.rn.ftz.f32 	%f77, %f14, %f86, %f95;
$Lt_73_6146:
	.loc	22	517	0
	mov.f32 	%f96, %f79;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+0], %f96;
	mov.f32 	%f97, %f78;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+4], %f97;
	mov.f32 	%f98, %f77;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+8], %f98;
	mov.f32 	%f99, %f73;
	st.param.f32 	[__cudaretf__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi+12], %f99;
	ret;
$LDWend__Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi:
	} // _Z41BlendMode_PixelFn_IR_BlendMode_Luminosity8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi[16]) _Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi, .param .s32 __cudaparmf4__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi)
	{
	.reg .u32 %r<60>;
	.reg .f32 %f<41>;
	.reg .pred %p<5>;
	.loc	22	524	0
$LDWbegin__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	.loc	22	526	0
	mul.ftz.f32 	%f19, %f18, %f8;
	mov.f32 	%f20, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f21, %f19, %f20;
	mov.f32 	%f22, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f21, %f22;
	@!%p1 bra 	$Lt_74_1794;
	.loc	22	528	0
	mov.f32 	%f23, %f10;
	mov.f32 	%f24, %f12;
	mov.f32 	%f25, %f14;
	mov.f32 	%f26, %f16;
	bra.uni 	$LBB9__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi;
$Lt_74_1794:
	.loc	22	530	0
	mov.f32 	%f27, 0f370637bd;    	// 8e-006
	add.ftz.f32 	%f28, %f19, %f27;
	mov.f32 	%f29, 0f3f800000;    	// 1
	setp.ge.ftz.f32 	%p2, %f28, %f29;
	@!%p2 bra 	$Lt_74_2050;
	.loc	22	532	0
	mov.f32 	%f23, %f2;
	mov.f32 	%f24, %f4;
	mov.f32 	%f25, %f6;
	mov.f32 	%f26, %f8;
	bra.uni 	$LBB9__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi;
$Lt_74_2050:
	.loc	21	143	0
	cvt.s32.u32 	%r1, %ctaid.y;
	cvt.s32.u32 	%r2, %ntid.y;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.x;
	cvt.s32.u32 	%r5, %ntid.x;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.y;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.x;
	add.u32 	%r10, %r6, %r9;
	shr.u32 	%r11, %r8, 13;
	mov.s32 	%r12, 1;
	sub.s32 	%r13, %r12, %r10;
	sub.u32 	%r14, %r10, %r8;
	sub.u32 	%r15, %r13, %r8;
	xor.b32 	%r16, %r11, %r15;
	shl.b32 	%r17, %r16, 8;
	sub.u32 	%r18, %r14, %r16;
	sub.u32 	%r19, %r8, %r16;
	xor.b32 	%r20, %r17, %r18;
	shr.u32 	%r21, %r20, 13;
	sub.u32 	%r22, %r19, %r20;
	sub.u32 	%r23, %r16, %r20;
	xor.b32 	%r24, %r21, %r22;
	shr.u32 	%r25, %r24, 12;
	sub.u32 	%r26, %r23, %r24;
	xor.b32 	%r27, %r25, %r26;
	sub.u32 	%r28, %r20, %r24;
	sub.u32 	%r29, %r28, %r27;
	shl.b32 	%r30, %r27, 16;
	xor.b32 	%r31, %r29, %r30;
	.loc	21	144	0
	sub.u32 	%r32, %r24, %r27;
	sub.u32 	%r33, %r32, %r31;
	shr.u32 	%r34, %r31, 5;
	xor.b32 	%r35, %r33, %r34;
	.loc	21	145	0
	sub.u32 	%r36, %r27, %r31;
	sub.u32 	%r37, %r36, %r35;
	shr.u32 	%r38, %r35, 3;
	xor.b32 	%r39, %r37, %r38;
	.loc	21	146	0
	sub.u32 	%r40, %r31, %r35;
	sub.u32 	%r41, %r40, %r39;
	shl.b32 	%r42, %r39, 10;
	xor.b32 	%r43, %r41, %r42;
	.loc	21	147	0
	sub.u32 	%r44, %r35, %r39;
	sub.u32 	%r45, %r44, %r43;
	shr.u32 	%r46, %r43, 15;
	xor.b32 	%r47, %r45, %r46;
	.loc	22	537	0
	mov.f32 	%f30, 0f46fffe00;    	// 32767
	mul.ftz.f32 	%f31, %f19, %f30;
	cvt.rzi.ftz.s32.f32 	%r48, %f31;
	mul.lo.u32 	%r49, %r47, 1103515245;
	add.u32 	%r50, %r49, 12345;
	shr.u32 	%r51, %r50, 16;
	and.b32 	%r52, %r51, 255;
	shl.b32 	%r53, %r52, 7;
	mul.lo.u32 	%r54, %r47, -1029531031;
	sub.u32 	%r55, %r54, 740551042;
	shr.u32 	%r56, %r55, 16;
	and.b32 	%r57, %r56, 255;
	xor.b32 	%r58, %r53, %r57;
	setp.lt.s32 	%p3, %r48, %r58;
	@%p3 bra 	$Lt_74_3330;
	mov.f32 	%f32, %f2;
	mov.f32 	%f33, %f4;
	mov.f32 	%f34, %f6;
	mov.f32 	%f35, %f8;
	bra.uni 	$Lt_74_3074;
$Lt_74_3330:
	mov.f32 	%f32, %f10;
	mov.f32 	%f33, %f12;
	mov.f32 	%f34, %f14;
	mov.f32 	%f35, %f16;
$Lt_74_3074:
	mov.f32 	%f23, %f32;
	mov.f32 	%f24, %f33;
	mov.f32 	%f25, %f34;
	mov.f32 	%f26, %f35;
$LBB9__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi:
	mov.f32 	%f36, %f23;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+0], %f36;
	mov.f32 	%f37, %f24;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+4], %f37;
	mov.f32 	%f38, %f25;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+8], %f38;
	mov.f32 	%f39, %f26;
	st.param.f32 	[__cudaretf__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi+12], %f39;
	ret;
$LDWend__Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi:
	} // _Z39BlendMode_PixelFn_IR_BlendMode_Dissolve8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi[16]) _Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi, .param .s32 __cudaparmf4__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi)
	{
	.reg .f32 %f<66>;
	.reg .pred %p<4>;
	.loc	22	551	0
$LDWbegin__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	.loc	22	555	0
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f20, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f22, %f4, %f21;
	fma.rn.ftz.f32 	%f23, %f20, %f6, %f22;
	fma.rn.ftz.f32 	%f24, %f19, %f2, %f23;
	cvt.ftz.sat.f32.f32 	%f25, %f24;
	mul.ftz.f32 	%f26, %f12, %f21;
	fma.rn.ftz.f32 	%f27, %f20, %f14, %f26;
	fma.rn.ftz.f32 	%f28, %f19, %f10, %f27;
	cvt.ftz.sat.f32.f32 	%f29, %f28;
	setp.lt.ftz.f32 	%p1, %f25, %f29;
	@!%p1 bra 	$Lt_75_1282;
	.loc	22	468	0
	mul.ftz.f32 	%f30, %f18, %f8;
	add.ftz.f32 	%f31, %f30, %f16;
	mul.ftz.f32 	%f32, %f30, %f16;
	sub.ftz.f32 	%f33, %f31, %f32;
	mov.f32 	%f34, %f33;
	mov.f32 	%f35, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f36, %f33, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p2, %f36, %f37;
	@!%p2 bra 	$Lt_75_2306;
	mov.f32 	%f38, 0f00000000;    	// 0
	mov.f32 	%f39, 0f00000000;    	// 0
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	bra.uni 	$Lt_75_2050;
$Lt_75_2306:
	mov.f32 	%f41, 0f3f800000;    	// 1
	sub.ftz.f32 	%f42, %f41, %f16;
	rcp.approx.ftz.f32 	%f43, %f33;
	mul.ftz.f32 	%f44, %f30, %f43;
	mov.f32 	%f45, 0f3f800000;    	// 1
	mul.ftz.f32 	%f46, %f30, %f43;
	sub.ftz.f32 	%f47, %f45, %f46;
	mul.ftz.f32 	%f48, %f42, %f2;
	fma.rn.ftz.f32 	%f49, %f2, %f16, %f48;
	mul.ftz.f32 	%f50, %f44, %f49;
	fma.rn.ftz.f32 	%f40, %f10, %f47, %f50;
	mul.ftz.f32 	%f51, %f42, %f4;
	fma.rn.ftz.f32 	%f52, %f4, %f16, %f51;
	mul.ftz.f32 	%f53, %f44, %f52;
	fma.rn.ftz.f32 	%f39, %f12, %f47, %f53;
	mul.ftz.f32 	%f54, %f42, %f6;
	fma.rn.ftz.f32 	%f55, %f6, %f16, %f54;
	mul.ftz.f32 	%f56, %f44, %f55;
	fma.rn.ftz.f32 	%f38, %f14, %f47, %f56;
$Lt_75_2050:
	.loc	22	557	0
	mov.f32 	%f57, %f40;
	mov.f32 	%f58, %f39;
	mov.f32 	%f59, %f38;
	mov.f32 	%f60, %f34;
	bra.uni 	$LBB7__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi;
$Lt_75_1282:
	.loc	22	561	0
	mov.f32 	%f57, %f10;
	mov.f32 	%f58, %f12;
	mov.f32 	%f59, %f14;
	mov.f32 	%f60, %f16;
$LBB7__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi:
	mov.f32 	%f61, %f57;
	st.param.f32 	[__cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+0], %f61;
	mov.f32 	%f62, %f58;
	st.param.f32 	[__cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+4], %f62;
	mov.f32 	%f63, %f59;
	st.param.f32 	[__cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+8], %f63;
	mov.f32 	%f64, %f60;
	st.param.f32 	[__cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi+12], %f64;
	ret;
$LDWend__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi:
	} // _Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec6018PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi[16]) _Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi, .param .s32 __cudaparmf4__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi)
	{
	.reg .f32 %f<66>;
	.reg .pred %p<4>;
	.loc	22	565	0
$LDWbegin__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	.loc	22	569	0
	ld.const.f32 	%f19, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f20, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f21, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f22, %f4, %f21;
	fma.rn.ftz.f32 	%f23, %f20, %f6, %f22;
	fma.rn.ftz.f32 	%f24, %f19, %f2, %f23;
	cvt.ftz.sat.f32.f32 	%f25, %f24;
	mul.ftz.f32 	%f26, %f12, %f21;
	fma.rn.ftz.f32 	%f27, %f20, %f14, %f26;
	fma.rn.ftz.f32 	%f28, %f19, %f10, %f27;
	cvt.ftz.sat.f32.f32 	%f29, %f28;
	setp.lt.ftz.f32 	%p1, %f25, %f29;
	@!%p1 bra 	$Lt_76_1282;
	.loc	22	468	0
	mul.ftz.f32 	%f30, %f18, %f8;
	add.ftz.f32 	%f31, %f30, %f16;
	mul.ftz.f32 	%f32, %f30, %f16;
	sub.ftz.f32 	%f33, %f31, %f32;
	mov.f32 	%f34, %f33;
	mov.f32 	%f35, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f36, %f33, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p2, %f36, %f37;
	@!%p2 bra 	$Lt_76_2306;
	mov.f32 	%f38, 0f00000000;    	// 0
	mov.f32 	%f39, 0f00000000;    	// 0
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	bra.uni 	$Lt_76_2050;
$Lt_76_2306:
	mov.f32 	%f41, 0f3f800000;    	// 1
	sub.ftz.f32 	%f42, %f41, %f16;
	rcp.approx.ftz.f32 	%f43, %f33;
	mul.ftz.f32 	%f44, %f30, %f43;
	mov.f32 	%f45, 0f3f800000;    	// 1
	mul.ftz.f32 	%f46, %f30, %f43;
	sub.ftz.f32 	%f47, %f45, %f46;
	mul.ftz.f32 	%f48, %f42, %f2;
	fma.rn.ftz.f32 	%f49, %f2, %f16, %f48;
	mul.ftz.f32 	%f50, %f44, %f49;
	fma.rn.ftz.f32 	%f40, %f10, %f47, %f50;
	mul.ftz.f32 	%f51, %f42, %f4;
	fma.rn.ftz.f32 	%f52, %f4, %f16, %f51;
	mul.ftz.f32 	%f53, %f44, %f52;
	fma.rn.ftz.f32 	%f39, %f12, %f47, %f53;
	mul.ftz.f32 	%f54, %f42, %f6;
	fma.rn.ftz.f32 	%f55, %f6, %f16, %f54;
	mul.ftz.f32 	%f56, %f44, %f55;
	fma.rn.ftz.f32 	%f38, %f14, %f47, %f56;
$Lt_76_2050:
	.loc	22	571	0
	mov.f32 	%f57, %f40;
	mov.f32 	%f58, %f39;
	mov.f32 	%f59, %f38;
	mov.f32 	%f60, %f34;
	bra.uni 	$LBB7__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi;
$Lt_76_1282:
	.loc	22	575	0
	mov.f32 	%f57, %f10;
	mov.f32 	%f58, %f12;
	mov.f32 	%f59, %f14;
	mov.f32 	%f60, %f16;
$LBB7__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi:
	mov.f32 	%f61, %f57;
	st.param.f32 	[__cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+0], %f61;
	mov.f32 	%f62, %f58;
	st.param.f32 	[__cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+4], %f62;
	mov.f32 	%f63, %f59;
	st.param.f32 	[__cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+8], %f63;
	mov.f32 	%f64, %f60;
	st.param.f32 	[__cudaretf__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi+12], %f64;
	ret;
$LDWend__Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi:
	} // _Z66BlendMode_PixelRecFn_IR_BlendMode_DarkerColorIR_ColorCoding_Rec7098PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi[16]) _Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi, .param .s32 __cudaparmf4__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi)
	{
	.reg .f32 %f<66>;
	.reg .pred %p<4>;
	.loc	22	580	0
$LDWbegin__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	.loc	22	584	0
	ld.const.f32 	%f19, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f20, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f22, %f4, %f21;
	fma.rn.ftz.f32 	%f23, %f20, %f6, %f22;
	fma.rn.ftz.f32 	%f24, %f19, %f2, %f23;
	cvt.ftz.sat.f32.f32 	%f25, %f24;
	mul.ftz.f32 	%f26, %f12, %f21;
	fma.rn.ftz.f32 	%f27, %f20, %f14, %f26;
	fma.rn.ftz.f32 	%f28, %f19, %f10, %f27;
	cvt.ftz.sat.f32.f32 	%f29, %f28;
	setp.gt.ftz.f32 	%p1, %f25, %f29;
	@!%p1 bra 	$Lt_77_1282;
	.loc	22	468	0
	mul.ftz.f32 	%f30, %f18, %f8;
	add.ftz.f32 	%f31, %f30, %f16;
	mul.ftz.f32 	%f32, %f30, %f16;
	sub.ftz.f32 	%f33, %f31, %f32;
	mov.f32 	%f34, %f33;
	mov.f32 	%f35, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f36, %f33, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p2, %f36, %f37;
	@!%p2 bra 	$Lt_77_2306;
	mov.f32 	%f38, 0f00000000;    	// 0
	mov.f32 	%f39, 0f00000000;    	// 0
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	bra.uni 	$Lt_77_2050;
$Lt_77_2306:
	mov.f32 	%f41, 0f3f800000;    	// 1
	sub.ftz.f32 	%f42, %f41, %f16;
	rcp.approx.ftz.f32 	%f43, %f33;
	mul.ftz.f32 	%f44, %f30, %f43;
	mov.f32 	%f45, 0f3f800000;    	// 1
	mul.ftz.f32 	%f46, %f30, %f43;
	sub.ftz.f32 	%f47, %f45, %f46;
	mul.ftz.f32 	%f48, %f42, %f2;
	fma.rn.ftz.f32 	%f49, %f2, %f16, %f48;
	mul.ftz.f32 	%f50, %f44, %f49;
	fma.rn.ftz.f32 	%f40, %f10, %f47, %f50;
	mul.ftz.f32 	%f51, %f42, %f4;
	fma.rn.ftz.f32 	%f52, %f4, %f16, %f51;
	mul.ftz.f32 	%f53, %f44, %f52;
	fma.rn.ftz.f32 	%f39, %f12, %f47, %f53;
	mul.ftz.f32 	%f54, %f42, %f6;
	fma.rn.ftz.f32 	%f55, %f6, %f16, %f54;
	mul.ftz.f32 	%f56, %f44, %f55;
	fma.rn.ftz.f32 	%f38, %f14, %f47, %f56;
$Lt_77_2050:
	.loc	22	586	0
	mov.f32 	%f57, %f40;
	mov.f32 	%f58, %f39;
	mov.f32 	%f59, %f38;
	mov.f32 	%f60, %f34;
	bra.uni 	$LBB7__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi;
$Lt_77_1282:
	.loc	22	590	0
	mov.f32 	%f57, %f10;
	mov.f32 	%f58, %f12;
	mov.f32 	%f59, %f14;
	mov.f32 	%f60, %f16;
$LBB7__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi:
	mov.f32 	%f61, %f57;
	st.param.f32 	[__cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+0], %f61;
	mov.f32 	%f62, %f58;
	st.param.f32 	[__cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+4], %f62;
	mov.f32 	%f63, %f59;
	st.param.f32 	[__cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+8], %f63;
	mov.f32 	%f64, %f60;
	st.param.f32 	[__cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi+12], %f64;
	ret;
$LDWend__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi:
	} // _Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec6018PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi[16]) _Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi, .param .s32 __cudaparmf4__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi)
	{
	.reg .f32 %f<66>;
	.reg .pred %p<4>;
	.loc	22	594	0
$LDWbegin__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	.loc	22	598	0
	ld.const.f32 	%f19, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f20, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f21, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f22, %f4, %f21;
	fma.rn.ftz.f32 	%f23, %f20, %f6, %f22;
	fma.rn.ftz.f32 	%f24, %f19, %f2, %f23;
	cvt.ftz.sat.f32.f32 	%f25, %f24;
	mul.ftz.f32 	%f26, %f12, %f21;
	fma.rn.ftz.f32 	%f27, %f20, %f14, %f26;
	fma.rn.ftz.f32 	%f28, %f19, %f10, %f27;
	cvt.ftz.sat.f32.f32 	%f29, %f28;
	setp.gt.ftz.f32 	%p1, %f25, %f29;
	@!%p1 bra 	$Lt_78_1282;
	.loc	22	468	0
	mul.ftz.f32 	%f30, %f18, %f8;
	add.ftz.f32 	%f31, %f30, %f16;
	mul.ftz.f32 	%f32, %f30, %f16;
	sub.ftz.f32 	%f33, %f31, %f32;
	mov.f32 	%f34, %f33;
	mov.f32 	%f35, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f36, %f33, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p2, %f36, %f37;
	@!%p2 bra 	$Lt_78_2306;
	mov.f32 	%f38, 0f00000000;    	// 0
	mov.f32 	%f39, 0f00000000;    	// 0
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f34, 0f00000000;    	// 0
	bra.uni 	$Lt_78_2050;
$Lt_78_2306:
	mov.f32 	%f41, 0f3f800000;    	// 1
	sub.ftz.f32 	%f42, %f41, %f16;
	rcp.approx.ftz.f32 	%f43, %f33;
	mul.ftz.f32 	%f44, %f30, %f43;
	mov.f32 	%f45, 0f3f800000;    	// 1
	mul.ftz.f32 	%f46, %f30, %f43;
	sub.ftz.f32 	%f47, %f45, %f46;
	mul.ftz.f32 	%f48, %f42, %f2;
	fma.rn.ftz.f32 	%f49, %f2, %f16, %f48;
	mul.ftz.f32 	%f50, %f44, %f49;
	fma.rn.ftz.f32 	%f40, %f10, %f47, %f50;
	mul.ftz.f32 	%f51, %f42, %f4;
	fma.rn.ftz.f32 	%f52, %f4, %f16, %f51;
	mul.ftz.f32 	%f53, %f44, %f52;
	fma.rn.ftz.f32 	%f39, %f12, %f47, %f53;
	mul.ftz.f32 	%f54, %f42, %f6;
	fma.rn.ftz.f32 	%f55, %f6, %f16, %f54;
	mul.ftz.f32 	%f56, %f44, %f55;
	fma.rn.ftz.f32 	%f38, %f14, %f47, %f56;
$Lt_78_2050:
	.loc	22	600	0
	mov.f32 	%f57, %f40;
	mov.f32 	%f58, %f39;
	mov.f32 	%f59, %f38;
	mov.f32 	%f60, %f34;
	bra.uni 	$LBB7__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi;
$Lt_78_1282:
	.loc	22	604	0
	mov.f32 	%f57, %f10;
	mov.f32 	%f58, %f12;
	mov.f32 	%f59, %f14;
	mov.f32 	%f60, %f16;
$LBB7__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi:
	mov.f32 	%f61, %f57;
	st.param.f32 	[__cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+0], %f61;
	mov.f32 	%f62, %f58;
	st.param.f32 	[__cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+4], %f62;
	mov.f32 	%f63, %f59;
	st.param.f32 	[__cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+8], %f63;
	mov.f32 	%f64, %f60;
	st.param.f32 	[__cudaretf__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi+12], %f64;
	ret;
$LDWend__Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi:
	} // _Z67BlendMode_PixelRecFn_IR_BlendMode_LighterColorIR_ColorCoding_Rec7098PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi[16]) _Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi, .param .s32 __cudaparmf4__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi)
	{
	.reg .u32 %r<5>;
	.reg .f32 %f<117>;
	.reg .pred %p<9>;
	.loc	22	608	0
$LDWbegin__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	ld.param.u32 	%r1, [__cudaparmf4__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi];
	mov.s32 	%r2, %r1;
	mul.ftz.f32 	%f19, %f18, %f8;
	mov.f32 	%f20, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f21, %f19, %f20;
	mov.f32 	%f22, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f21, %f22;
	@!%p1 bra 	$Lt_79_7426;
	mov.f32 	%f23, %f10;
	mov.f32 	%f24, %f12;
	mov.f32 	%f25, %f14;
	mov.f32 	%f26, %f16;
	bra.uni 	$Lt_79_7170;
$Lt_79_7426:
	mov.f32 	%f27, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f28, %f16, %f27;
	mov.f32 	%f29, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p2, %f28, %f29;
	@!%p2 bra 	$Lt_79_7938;
	mov.f32 	%f23, %f2;
	mov.f32 	%f24, %f4;
	mov.f32 	%f25, %f6;
	mov.f32 	%f26, %f19;
	bra.uni 	$Lt_79_7682;
$Lt_79_7938:
	mov.u32 	%r3, 720;
	setp.gt.s32 	%p3, %r2, %r3;
	@%p3 bra 	$Lt_79_8450;
	.loc	22	555	0
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f31, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f32, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f33, %f4, %f32;
	fma.rn.ftz.f32 	%f34, %f31, %f6, %f33;
	fma.rn.ftz.f32 	%f35, %f30, %f2, %f34;
	cvt.ftz.sat.f32.f32 	%f36, %f35;
	mul.ftz.f32 	%f37, %f12, %f32;
	fma.rn.ftz.f32 	%f38, %f31, %f14, %f37;
	fma.rn.ftz.f32 	%f39, %f30, %f10, %f38;
	cvt.ftz.sat.f32.f32 	%f40, %f39;
	setp.lt.ftz.f32 	%p4, %f36, %f40;
	@!%p4 bra 	$Lt_79_5634;
	.loc	22	468	0
	add.ftz.f32 	%f41, %f19, %f16;
	mul.ftz.f32 	%f42, %f19, %f16;
	sub.ftz.f32 	%f43, %f41, %f42;
	mov.f32 	%f44, %f43;
	mov.f32 	%f45, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f46, %f43, %f45;
	mov.f32 	%f47, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p5, %f46, %f47;
	@!%p5 bra 	$Lt_79_8962;
	mov.f32 	%f48, 0f00000000;    	// 0
	mov.f32 	%f49, 0f00000000;    	// 0
	mov.f32 	%f50, 0f00000000;    	// 0
	mov.f32 	%f44, 0f00000000;    	// 0
	bra.uni 	$Lt_79_8706;
$Lt_79_8962:
	mov.f32 	%f51, 0f3f800000;    	// 1
	sub.ftz.f32 	%f52, %f51, %f16;
	rcp.approx.ftz.f32 	%f53, %f43;
	mul.ftz.f32 	%f54, %f19, %f53;
	mov.f32 	%f55, 0f3f800000;    	// 1
	mul.ftz.f32 	%f56, %f19, %f53;
	sub.ftz.f32 	%f57, %f55, %f56;
	mul.ftz.f32 	%f58, %f52, %f2;
	fma.rn.ftz.f32 	%f59, %f2, %f16, %f58;
	mul.ftz.f32 	%f60, %f54, %f59;
	fma.rn.ftz.f32 	%f50, %f10, %f57, %f60;
	mul.ftz.f32 	%f61, %f52, %f4;
	fma.rn.ftz.f32 	%f62, %f4, %f16, %f61;
	mul.ftz.f32 	%f63, %f54, %f62;
	fma.rn.ftz.f32 	%f49, %f12, %f57, %f63;
	mul.ftz.f32 	%f64, %f52, %f6;
	fma.rn.ftz.f32 	%f65, %f6, %f16, %f64;
	mul.ftz.f32 	%f66, %f54, %f65;
	fma.rn.ftz.f32 	%f48, %f14, %f57, %f66;
$Lt_79_8706:
	.loc	22	557	0
	mov.f32 	%f67, %f50;
	mov.f32 	%f68, %f49;
	mov.f32 	%f69, %f48;
	mov.f32 	%f70, %f44;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_256_3;
$Lt_79_5634:
	.loc	22	561	0
	mov.f32 	%f67, %f10;
	mov.f32 	%f68, %f12;
	mov.f32 	%f69, %f14;
	mov.f32 	%f70, %f16;
$LDWendi__Z10GetLuma6018PixelRGB_256_3:
	.loc	22	608	0
	mov.f32 	%f23, %f67;
	mov.f32 	%f24, %f68;
	mov.f32 	%f25, %f69;
	mov.f32 	%f26, %f70;
	bra.uni 	$Lt_79_8194;
$Lt_79_8450:
	.loc	22	569	0
	ld.const.f32 	%f71, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f72, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f73, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f74, %f4, %f73;
	fma.rn.ftz.f32 	%f75, %f72, %f6, %f74;
	fma.rn.ftz.f32 	%f76, %f71, %f2, %f75;
	cvt.ftz.sat.f32.f32 	%f77, %f76;
	mul.ftz.f32 	%f78, %f12, %f73;
	fma.rn.ftz.f32 	%f79, %f72, %f14, %f78;
	fma.rn.ftz.f32 	%f80, %f71, %f10, %f79;
	cvt.ftz.sat.f32.f32 	%f81, %f80;
	setp.lt.ftz.f32 	%p6, %f77, %f81;
	@!%p6 bra 	$Lt_79_6146;
	.loc	22	468	0
	add.ftz.f32 	%f82, %f19, %f16;
	mul.ftz.f32 	%f83, %f19, %f16;
	sub.ftz.f32 	%f84, %f82, %f83;
	mov.f32 	%f85, %f84;
	mov.f32 	%f86, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f87, %f84, %f86;
	mov.f32 	%f88, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p7, %f87, %f88;
	@!%p7 bra 	$Lt_79_9474;
	mov.f32 	%f89, 0f00000000;    	// 0
	mov.f32 	%f90, 0f00000000;    	// 0
	mov.f32 	%f91, 0f00000000;    	// 0
	mov.f32 	%f85, 0f00000000;    	// 0
	bra.uni 	$Lt_79_9218;
$Lt_79_9474:
	mov.f32 	%f92, 0f3f800000;    	// 1
	sub.ftz.f32 	%f93, %f92, %f16;
	rcp.approx.ftz.f32 	%f94, %f84;
	mul.ftz.f32 	%f95, %f19, %f94;
	mov.f32 	%f96, 0f3f800000;    	// 1
	mul.ftz.f32 	%f97, %f19, %f94;
	sub.ftz.f32 	%f98, %f96, %f97;
	mul.ftz.f32 	%f99, %f93, %f2;
	fma.rn.ftz.f32 	%f100, %f2, %f16, %f99;
	mul.ftz.f32 	%f101, %f95, %f100;
	fma.rn.ftz.f32 	%f91, %f10, %f98, %f101;
	mul.ftz.f32 	%f102, %f93, %f4;
	fma.rn.ftz.f32 	%f103, %f4, %f16, %f102;
	mul.ftz.f32 	%f104, %f95, %f103;
	fma.rn.ftz.f32 	%f90, %f12, %f98, %f104;
	mul.ftz.f32 	%f105, %f93, %f6;
	fma.rn.ftz.f32 	%f106, %f6, %f16, %f105;
	mul.ftz.f32 	%f107, %f95, %f106;
	fma.rn.ftz.f32 	%f89, %f14, %f98, %f107;
$Lt_79_9218:
	.loc	22	571	0
	mov.f32 	%f108, %f91;
	mov.f32 	%f109, %f90;
	mov.f32 	%f110, %f89;
	mov.f32 	%f111, %f85;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_256_1;
$Lt_79_6146:
	.loc	22	575	0
	mov.f32 	%f108, %f10;
	mov.f32 	%f109, %f12;
	mov.f32 	%f110, %f14;
	mov.f32 	%f111, %f16;
$LDWendi__Z10GetLuma7098PixelRGB_256_1:
	.loc	22	608	0
	mov.f32 	%f23, %f108;
	mov.f32 	%f24, %f109;
	mov.f32 	%f25, %f110;
	mov.f32 	%f26, %f111;
$Lt_79_8194:
$Lt_79_7682:
$Lt_79_7170:
	mov.f32 	%f112, %f23;
	st.param.f32 	[__cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+0], %f112;
	mov.f32 	%f113, %f24;
	st.param.f32 	[__cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+4], %f113;
	mov.f32 	%f114, %f25;
	st.param.f32 	[__cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+8], %f114;
	mov.f32 	%f115, %f26;
	st.param.f32 	[__cudaretf__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi+12], %f115;
	ret;
$LDWend__Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi:
	} // _Z42BlendMode_PixelFn_IR_BlendMode_DarkerColor8PixelRGBS_fi

	.visible .func (.param .align 16 .b8 __cudaretf__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi[16]) _Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi (.param .align 16 .b8 __cudaparmf1__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi[16], .param .align 16 .b8 __cudaparmf2__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi[16], .param .f32 __cudaparmf3__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi, .param .s32 __cudaparmf4__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi)
	{
	.reg .u32 %r<5>;
	.reg .f32 %f<117>;
	.reg .pred %p<9>;
	.loc	22	609	0
$LDWbegin__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi:
	ld.param.f32 	%f1, [__cudaparmf1__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+12];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi];
	mov.f32 	%f18, %f17;
	ld.param.u32 	%r1, [__cudaparmf4__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi];
	mov.s32 	%r2, %r1;
	mul.ftz.f32 	%f19, %f18, %f8;
	mov.f32 	%f20, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f21, %f19, %f20;
	mov.f32 	%f22, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p1, %f21, %f22;
	@!%p1 bra 	$Lt_80_7426;
	mov.f32 	%f23, %f10;
	mov.f32 	%f24, %f12;
	mov.f32 	%f25, %f14;
	mov.f32 	%f26, %f16;
	bra.uni 	$Lt_80_7170;
$Lt_80_7426:
	mov.f32 	%f27, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f28, %f16, %f27;
	mov.f32 	%f29, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p2, %f28, %f29;
	@!%p2 bra 	$Lt_80_7938;
	mov.f32 	%f23, %f2;
	mov.f32 	%f24, %f4;
	mov.f32 	%f25, %f6;
	mov.f32 	%f26, %f19;
	bra.uni 	$Lt_80_7682;
$Lt_80_7938:
	mov.u32 	%r3, 720;
	setp.gt.s32 	%p3, %r2, %r3;
	@%p3 bra 	$Lt_80_8450;
	.loc	22	584	0
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f31, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f32, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f33, %f4, %f32;
	fma.rn.ftz.f32 	%f34, %f31, %f6, %f33;
	fma.rn.ftz.f32 	%f35, %f30, %f2, %f34;
	cvt.ftz.sat.f32.f32 	%f36, %f35;
	mul.ftz.f32 	%f37, %f12, %f32;
	fma.rn.ftz.f32 	%f38, %f31, %f14, %f37;
	fma.rn.ftz.f32 	%f39, %f30, %f10, %f38;
	cvt.ftz.sat.f32.f32 	%f40, %f39;
	setp.gt.ftz.f32 	%p4, %f36, %f40;
	@!%p4 bra 	$Lt_80_5634;
	.loc	22	468	0
	add.ftz.f32 	%f41, %f19, %f16;
	mul.ftz.f32 	%f42, %f19, %f16;
	sub.ftz.f32 	%f43, %f41, %f42;
	mov.f32 	%f44, %f43;
	mov.f32 	%f45, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f46, %f43, %f45;
	mov.f32 	%f47, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p5, %f46, %f47;
	@!%p5 bra 	$Lt_80_8962;
	mov.f32 	%f48, 0f00000000;    	// 0
	mov.f32 	%f49, 0f00000000;    	// 0
	mov.f32 	%f50, 0f00000000;    	// 0
	mov.f32 	%f44, 0f00000000;    	// 0
	bra.uni 	$Lt_80_8706;
$Lt_80_8962:
	mov.f32 	%f51, 0f3f800000;    	// 1
	sub.ftz.f32 	%f52, %f51, %f16;
	rcp.approx.ftz.f32 	%f53, %f43;
	mul.ftz.f32 	%f54, %f19, %f53;
	mov.f32 	%f55, 0f3f800000;    	// 1
	mul.ftz.f32 	%f56, %f19, %f53;
	sub.ftz.f32 	%f57, %f55, %f56;
	mul.ftz.f32 	%f58, %f52, %f2;
	fma.rn.ftz.f32 	%f59, %f2, %f16, %f58;
	mul.ftz.f32 	%f60, %f54, %f59;
	fma.rn.ftz.f32 	%f50, %f10, %f57, %f60;
	mul.ftz.f32 	%f61, %f52, %f4;
	fma.rn.ftz.f32 	%f62, %f4, %f16, %f61;
	mul.ftz.f32 	%f63, %f54, %f62;
	fma.rn.ftz.f32 	%f49, %f12, %f57, %f63;
	mul.ftz.f32 	%f64, %f52, %f6;
	fma.rn.ftz.f32 	%f65, %f6, %f16, %f64;
	mul.ftz.f32 	%f66, %f54, %f65;
	fma.rn.ftz.f32 	%f48, %f14, %f57, %f66;
$Lt_80_8706:
	.loc	22	586	0
	mov.f32 	%f67, %f50;
	mov.f32 	%f68, %f49;
	mov.f32 	%f69, %f48;
	mov.f32 	%f70, %f44;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_257_3;
$Lt_80_5634:
	.loc	22	590	0
	mov.f32 	%f67, %f10;
	mov.f32 	%f68, %f12;
	mov.f32 	%f69, %f14;
	mov.f32 	%f70, %f16;
$LDWendi__Z10GetLuma6018PixelRGB_257_3:
	.loc	22	609	0
	mov.f32 	%f23, %f67;
	mov.f32 	%f24, %f68;
	mov.f32 	%f25, %f69;
	mov.f32 	%f26, %f70;
	bra.uni 	$Lt_80_8194;
$Lt_80_8450:
	.loc	22	598	0
	ld.const.f32 	%f71, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f72, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f73, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f74, %f4, %f73;
	fma.rn.ftz.f32 	%f75, %f72, %f6, %f74;
	fma.rn.ftz.f32 	%f76, %f71, %f2, %f75;
	cvt.ftz.sat.f32.f32 	%f77, %f76;
	mul.ftz.f32 	%f78, %f12, %f73;
	fma.rn.ftz.f32 	%f79, %f72, %f14, %f78;
	fma.rn.ftz.f32 	%f80, %f71, %f10, %f79;
	cvt.ftz.sat.f32.f32 	%f81, %f80;
	setp.gt.ftz.f32 	%p6, %f77, %f81;
	@!%p6 bra 	$Lt_80_6146;
	.loc	22	468	0
	add.ftz.f32 	%f82, %f19, %f16;
	mul.ftz.f32 	%f83, %f19, %f16;
	sub.ftz.f32 	%f84, %f82, %f83;
	mov.f32 	%f85, %f84;
	mov.f32 	%f86, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f87, %f84, %f86;
	mov.f32 	%f88, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p7, %f87, %f88;
	@!%p7 bra 	$Lt_80_9474;
	mov.f32 	%f89, 0f00000000;    	// 0
	mov.f32 	%f90, 0f00000000;    	// 0
	mov.f32 	%f91, 0f00000000;    	// 0
	mov.f32 	%f85, 0f00000000;    	// 0
	bra.uni 	$Lt_80_9218;
$Lt_80_9474:
	mov.f32 	%f92, 0f3f800000;    	// 1
	sub.ftz.f32 	%f93, %f92, %f16;
	rcp.approx.ftz.f32 	%f94, %f84;
	mul.ftz.f32 	%f95, %f19, %f94;
	mov.f32 	%f96, 0f3f800000;    	// 1
	mul.ftz.f32 	%f97, %f19, %f94;
	sub.ftz.f32 	%f98, %f96, %f97;
	mul.ftz.f32 	%f99, %f93, %f2;
	fma.rn.ftz.f32 	%f100, %f2, %f16, %f99;
	mul.ftz.f32 	%f101, %f95, %f100;
	fma.rn.ftz.f32 	%f91, %f10, %f98, %f101;
	mul.ftz.f32 	%f102, %f93, %f4;
	fma.rn.ftz.f32 	%f103, %f4, %f16, %f102;
	mul.ftz.f32 	%f104, %f95, %f103;
	fma.rn.ftz.f32 	%f90, %f12, %f98, %f104;
	mul.ftz.f32 	%f105, %f93, %f6;
	fma.rn.ftz.f32 	%f106, %f6, %f16, %f105;
	mul.ftz.f32 	%f107, %f95, %f106;
	fma.rn.ftz.f32 	%f89, %f14, %f98, %f107;
$Lt_80_9218:
	.loc	22	600	0
	mov.f32 	%f108, %f91;
	mov.f32 	%f109, %f90;
	mov.f32 	%f110, %f89;
	mov.f32 	%f111, %f85;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_257_1;
$Lt_80_6146:
	.loc	22	604	0
	mov.f32 	%f108, %f10;
	mov.f32 	%f109, %f12;
	mov.f32 	%f110, %f14;
	mov.f32 	%f111, %f16;
$LDWendi__Z10GetLuma7098PixelRGB_257_1:
	.loc	22	609	0
	mov.f32 	%f23, %f108;
	mov.f32 	%f24, %f109;
	mov.f32 	%f25, %f110;
	mov.f32 	%f26, %f111;
$Lt_80_8194:
$Lt_80_7682:
$Lt_80_7170:
	mov.f32 	%f112, %f23;
	st.param.f32 	[__cudaretf__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+0], %f112;
	mov.f32 	%f113, %f24;
	st.param.f32 	[__cudaretf__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+4], %f113;
	mov.f32 	%f114, %f25;
	st.param.f32 	[__cudaretf__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+8], %f114;
	mov.f32 	%f115, %f26;
	st.param.f32 	[__cudaretf__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi+12], %f115;
	ret;
$LDWend__Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi:
	} // _Z43BlendMode_PixelFn_IR_BlendMode_LighterColor8PixelRGBS_fi

	.visible .func (.param .f32 __cudaretf__Z8distanceff6float2S_) _Z8distanceff6float2S_ (.param .f32 __cudaparmf1__Z8distanceff6float2S_, .param .f32 __cudaparmf2__Z8distanceff6float2S_, .param .align 8 .b8 __cudaparmf3__Z8distanceff6float2S_[8], .param .align 8 .b8 __cudaparmf4__Z8distanceff6float2S_[8])
	{
	.reg .f32 %f<19>;
	.loc	6	47	0
$LDWbegin__Z8distanceff6float2S_:
	ld.param.f32 	%f1, [__cudaparmf1__Z8distanceff6float2S_];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z8distanceff6float2S_];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf3__Z8distanceff6float2S_+0];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf3__Z8distanceff6float2S_+4];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf4__Z8distanceff6float2S_+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf4__Z8distanceff6float2S_+4];
	mov.f32 	%f12, %f11;
	.loc	6	48	0
	sub.ftz.f32 	%f13, %f6, %f2;
	mul.ftz.f32 	%f14, %f12, %f13;
	sub.ftz.f32 	%f15, %f8, %f4;
	mul.ftz.f32 	%f16, %f10, %f15;
	sub.ftz.f32 	%f17, %f16, %f14;
	st.param.f32 	[__cudaretf__Z8distanceff6float2S_], %f17;
	ret;
$LDWend__Z8distanceff6float2S_:
	} // _Z8distanceff6float2S_

	.visible .func (.param .align 16 .b8 __cudaretf__Z3sum6float4S_[16]) _Z3sum6float4S_ (.param .align 16 .b8 __cudaparmf1__Z3sum6float4S_[16], .param .align 16 .b8 __cudaparmf2__Z3sum6float4S_[16])
	{
	.reg .f32 %f<26>;
	.loc	6	52	0
$LDWbegin__Z3sum6float4S_:
	ld.param.f32 	%f1, [__cudaparmf1__Z3sum6float4S_+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z3sum6float4S_+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z3sum6float4S_+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z3sum6float4S_+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z3sum6float4S_+0];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z3sum6float4S_+4];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf2__Z3sum6float4S_+8];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf2__Z3sum6float4S_+12];
	mov.f32 	%f16, %f15;
	.loc	6	53	0
	add.ftz.f32 	%f17, %f2, %f10;
	.loc	6	54	0
	add.ftz.f32 	%f18, %f4, %f12;
	.loc	6	55	0
	add.ftz.f32 	%f19, %f6, %f14;
	.loc	6	56	0
	add.ftz.f32 	%f20, %f8, %f16;
	.loc	6	57	0
	mov.f32 	%f21, %f17;
	st.param.f32 	[__cudaretf__Z3sum6float4S_+0], %f21;
	mov.f32 	%f22, %f18;
	st.param.f32 	[__cudaretf__Z3sum6float4S_+4], %f22;
	mov.f32 	%f23, %f19;
	st.param.f32 	[__cudaretf__Z3sum6float4S_+8], %f23;
	mov.f32 	%f24, %f20;
	st.param.f32 	[__cudaretf__Z3sum6float4S_+12], %f24;
	ret;
$LDWend__Z3sum6float4S_:
	} // _Z3sum6float4S_

	.visible .func (.param .align 16 .b8 __cudaretf__Z6weight6float4f[16]) _Z6weight6float4f (.param .align 16 .b8 __cudaparmf1__Z6weight6float4f[16], .param .f32 __cudaparmf2__Z6weight6float4f)
	{
	.reg .f32 %f<20>;
	.loc	6	61	0
$LDWbegin__Z6weight6float4f:
	ld.param.f32 	%f1, [__cudaparmf1__Z6weight6float4f+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z6weight6float4f+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z6weight6float4f+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z6weight6float4f+12];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z6weight6float4f];
	mov.f32 	%f10, %f9;
	.loc	6	62	0
	mul.ftz.f32 	%f11, %f2, %f10;
	.loc	6	63	0
	mul.ftz.f32 	%f12, %f10, %f4;
	.loc	6	64	0
	mul.ftz.f32 	%f13, %f10, %f6;
	.loc	6	65	0
	mul.ftz.f32 	%f14, %f10, %f8;
	.loc	6	66	0
	mov.f32 	%f15, %f11;
	st.param.f32 	[__cudaretf__Z6weight6float4f+0], %f15;
	mov.f32 	%f16, %f12;
	st.param.f32 	[__cudaretf__Z6weight6float4f+4], %f16;
	mov.f32 	%f17, %f13;
	st.param.f32 	[__cudaretf__Z6weight6float4f+8], %f17;
	mov.f32 	%f18, %f14;
	st.param.f32 	[__cudaretf__Z6weight6float4f+12], %f18;
	ret;
$LDWend__Z6weight6float4f:
	} // _Z6weight6float4f

	.visible .func (.param .align 4 .b8 __cudaretf__Z9normalize6float3[12]) _Z9normalize6float3 (.param .align 4 .b8 __cudaparmf1__Z9normalize6float3[12])
	{
	.reg .f32 %f<18>;
	.loc	6	70	0
$LDWbegin__Z9normalize6float3:
	ld.param.f32 	%f1, [__cudaparmf1__Z9normalize6float3+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z9normalize6float3+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z9normalize6float3+8];
	mov.f32 	%f6, %f5;
	.loc	6	71	0
	mul.ftz.f32 	%f7, %f4, %f4;
	fma.rn.ftz.f32 	%f8, %f2, %f2, %f7;
	fma.rn.ftz.f32 	%f9, %f6, %f6, %f8;
	rsqrt.approx.ftz.f32 	%f10, %f9;
	.loc	6	72	0
	mul.ftz.f32 	%f11, %f10, %f2;
	.loc	6	73	0
	mul.ftz.f32 	%f12, %f10, %f4;
	.loc	6	74	0
	mul.ftz.f32 	%f13, %f10, %f6;
	.loc	6	75	0
	mov.f32 	%f14, %f11;
	st.param.f32 	[__cudaretf__Z9normalize6float3+0], %f14;
	mov.f32 	%f15, %f12;
	st.param.f32 	[__cudaretf__Z9normalize6float3+4], %f15;
	mov.f32 	%f16, %f13;
	st.param.f32 	[__cudaretf__Z9normalize6float3+8], %f16;
	ret;
$LDWend__Z9normalize6float3:
	} // _Z9normalize6float3

	.visible .func (.param .align 4 .b8 __cudaretf__Z6vector6float3S_[12]) _Z6vector6float3S_ (.param .align 4 .b8 __cudaparmf1__Z6vector6float3S_[12], .param .align 4 .b8 __cudaparmf2__Z6vector6float3S_[12])
	{
	.reg .f32 %f<17>;
	.loc	6	79	0
$LDWbegin__Z6vector6float3S_:
	ld.param.f32 	%f1, [__cudaparmf1__Z6vector6float3S_+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z6vector6float3S_+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z6vector6float3S_+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf2__Z6vector6float3S_+0];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z6vector6float3S_+4];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z6vector6float3S_+8];
	mov.f32 	%f12, %f11;
	.loc	6	84	0
	sub.ftz.f32 	%f13, %f2, %f8;
	st.param.f32 	[__cudaretf__Z6vector6float3S_+0], %f13;
	sub.ftz.f32 	%f14, %f4, %f10;
	st.param.f32 	[__cudaretf__Z6vector6float3S_+4], %f14;
	sub.ftz.f32 	%f15, %f6, %f12;
	st.param.f32 	[__cudaretf__Z6vector6float3S_+8], %f15;
	ret;
$LDWend__Z6vector6float3S_:
	} // _Z6vector6float3S_

	.visible .func (.param .align 4 .b8 __cudaretf__Z12crossProduct6float3S_[12]) _Z12crossProduct6float3S_ (.param .align 4 .b8 __cudaparmf1__Z12crossProduct6float3S_[12], .param .align 4 .b8 __cudaparmf2__Z12crossProduct6float3S_[12])
	{
	.reg .f32 %f<23>;
	.loc	6	88	0
$LDWbegin__Z12crossProduct6float3S_:
	ld.param.f32 	%f1, [__cudaparmf1__Z12crossProduct6float3S_+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z12crossProduct6float3S_+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z12crossProduct6float3S_+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf2__Z12crossProduct6float3S_+0];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z12crossProduct6float3S_+4];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z12crossProduct6float3S_+8];
	mov.f32 	%f12, %f11;
	.loc	6	93	0
	mul.ftz.f32 	%f13, %f6, %f10;
	mul.ftz.f32 	%f14, %f4, %f12;
	sub.ftz.f32 	%f15, %f14, %f13;
	st.param.f32 	[__cudaretf__Z12crossProduct6float3S_+0], %f15;
	mul.ftz.f32 	%f16, %f2, %f12;
	mul.ftz.f32 	%f17, %f8, %f6;
	sub.ftz.f32 	%f18, %f17, %f16;
	st.param.f32 	[__cudaretf__Z12crossProduct6float3S_+4], %f18;
	mul.ftz.f32 	%f19, %f8, %f4;
	mul.ftz.f32 	%f20, %f2, %f10;
	sub.ftz.f32 	%f21, %f20, %f19;
	st.param.f32 	[__cudaretf__Z12crossProduct6float3S_+8], %f21;
	ret;
$LDWend__Z12crossProduct6float3S_:
	} // _Z12crossProduct6float3S_

	.visible .func (.param .f32 __cudaretf__Z12innerProduct6float3S_) _Z12innerProduct6float3S_ (.param .align 4 .b8 __cudaparmf1__Z12innerProduct6float3S_[12], .param .align 4 .b8 __cudaparmf2__Z12innerProduct6float3S_[12])
	{
	.reg .f32 %f<17>;
	.loc	6	97	0
$LDWbegin__Z12innerProduct6float3S_:
	ld.param.f32 	%f1, [__cudaparmf1__Z12innerProduct6float3S_+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z12innerProduct6float3S_+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z12innerProduct6float3S_+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf2__Z12innerProduct6float3S_+0];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z12innerProduct6float3S_+4];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z12innerProduct6float3S_+8];
	mov.f32 	%f12, %f11;
	.loc	6	98	0
	mul.ftz.f32 	%f13, %f4, %f10;
	fma.rn.ftz.f32 	%f14, %f2, %f8, %f13;
	fma.rn.ftz.f32 	%f15, %f6, %f12, %f14;
	st.param.f32 	[__cudaretf__Z12innerProduct6float3S_], %f15;
	ret;
$LDWend__Z12innerProduct6float3S_:
	} // _Z12innerProduct6float3S_

	.visible .func _Z18TransformDestToSrcPfS_iiiiffffffff (.param .u64 __cudaparmf1__Z18TransformDestToSrcPfS_iiiiffffffff, .param .u64 __cudaparmf2__Z18TransformDestToSrcPfS_iiiiffffffff, .param .s32 __cudaparmf3__Z18TransformDestToSrcPfS_iiiiffffffff, .param .s32 __cudaparmf4__Z18TransformDestToSrcPfS_iiiiffffffff, .param .s32 __cudaparmf5__Z18TransformDestToSrcPfS_iiiiffffffff, .param .s32 __cudaparmf6__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf7__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf8__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf9__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf10__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf11__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf12__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf13__Z18TransformDestToSrcPfS_iiiiffffffff, .param .f32 __cudaparmf14__Z18TransformDestToSrcPfS_iiiiffffffff)
	{
	.reg .u32 %r<10>;
	.reg .u64 %rd<6>;
	.reg .f32 %f<38>;
	.reg .pred %p<3>;
	.loc	6	119	0
$LDWbegin__Z18TransformDestToSrcPfS_iiiiffffffff:
	ld.param.u64 	%rd1, [__cudaparmf1__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.s64 	%rd2, %rd1;
	ld.param.u64 	%rd3, [__cudaparmf2__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.s64 	%rd4, %rd3;
	ld.param.u32 	%r1, [__cudaparmf3__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.s32 	%r6, %r5;
	ld.param.u32 	%r7, [__cudaparmf6__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.s32 	%r8, %r7;
	ld.param.f32 	%f1, [__cudaparmf7__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf8__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf9__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf10__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf11__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf12__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf13__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf14__Z18TransformDestToSrcPfS_iiiiffffffff];
	mov.f32 	%f16, %f15;
	.loc	6	122	0
	cvt.rn.f32.s32 	%f17, %r8;
	add.ftz.f32 	%f18, %f17, %f4;
	.loc	6	124	0
	cvt.rn.f32.s32 	%f19, %r6;
	add.ftz.f32 	%f20, %f19, %f2;
	mul.ftz.f32 	%f21, %f16, %f20;
	abs.ftz.f32 	%f22, %f14;
	mov.f32 	%f23, 0f370637bd;    	// 8e-006
	setp.lt.ftz.f32 	%p1, %f22, %f23;
	@%p1 bra 	$Lt_88_1026;
	.loc	27	529	0
	cvt.rn.f32.s32 	%f24, %r2;
	cvt.rn.f32.s32 	%f25, %r4;
	mul.ftz.f32 	%f26, %f24, %f21;
	div.approx.ftz.f32 	%f27, %f26, %f25;
	.loc	6	134	0
	cos.approx.ftz.f32 	%f28, %f14;
	sin.approx.ftz.f32 	%f29, %f14;
	mul.ftz.f32 	%f30, %f28, %f27;
	fma.rn.ftz.f32 	%f31, %f18, %f29, %f30;
	mul.ftz.f32 	%f32, %f25, %f31;
	div.approx.ftz.f32 	%f21, %f32, %f24;
	.loc	6	135	0
	mul.ftz.f32 	%f33, %f29, %f27;
	mul.ftz.f32 	%f34, %f18, %f28;
	sub.ftz.f32 	%f18, %f34, %f33;
$Lt_88_1026:
	.loc	6	139	0
	mul.ftz.f32 	%f21, %f10, %f21;
	.loc	6	140	0
	mul.ftz.f32 	%f18, %f12, %f18;
	.loc	6	143	0
	add.ftz.f32 	%f35, %f6, %f21;
	st.f32 	[%rd2+0], %f35;
	.loc	6	144	0
	add.ftz.f32 	%f36, %f8, %f18;
	st.f32 	[%rd4+0], %f36;
	.loc	6	145	0
	ret;
$LDWend__Z18TransformDestToSrcPfS_iiiiffffffff:
	} // _Z18TransformDestToSrcPfS_iiiiffffffff

	.visible .func _Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb (.param .align 16 .b8 __cudaparmf1__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb[16], .param .u64 __cudaparmf2__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf3__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .u32 __cudaparmf4__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf5__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf6__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf7__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf8__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .f32 __cudaparmf9__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .u32 __cudaparmf10__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf11__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb, .param .s32 __cudaparmf12__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb)
	{
	.reg .u32 %r<121>;
	.reg .u64 %rd<13>;
	.reg .f32 %f<1759>;
	.reg .pred %p<153>;
	.loc	6	163	0
$LDWbegin__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb:
	ld.param.f32 	%f1, [__cudaparmf1__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf1__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb+12];
	mov.f32 	%f8, %f7;
	mov.f32 	%f9, %f8;
	ld.param.u64 	%rd1, [__cudaparmf2__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	mov.s64 	%rd2, %rd1;
	ld.param.u32 	%r1, [__cudaparmf3__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	mov.s32 	%r2, %r1;
	ld.param.u32 	%r3, [__cudaparmf4__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	mov.s32 	%r4, %r3;
	ld.param.u32 	%r5, [__cudaparmf5__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	mov.s32 	%r6, %r5;
	ld.param.u32 	%r7, [__cudaparmf6__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	mov.s32 	%r8, %r7;
	ld.param.u32 	%r9, [__cudaparmf7__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	mov.s32 	%r10, %r9;
	ld.param.f32 	%f10, [__cudaparmf9__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	mov.f32 	%f11, %f10;
	ld.param.u32 	%r11, [__cudaparmf10__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	mov.s32 	%r12, %r11;
	ld.param.u32 	%r13, [__cudaparmf11__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	cvt.s8.s32 	%r14, %r13;
	ld.param.u32 	%r15, [__cudaparmf12__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb];
	cvt.s8.s32 	%r16, %r15;
	mov.s32 	%r17, 18;
	setp.eq.s32 	%p1, %r12, %r17;
	mov.s32 	%r18, 0;
	setp.eq.s32 	%p2, %r4, %r18;
	mul.lo.s32 	%r19, %r2, %r8;
	add.s32 	%r20, %r6, %r19;
	cvt.s64.s32 	%rd3, %r20;
	mov.u32 	%r21, 0;
	setp.eq.s32 	%p3, %r14, %r21;
	@%p3 bra 	$Lt_89_263682;
	@!%p2 bra 	$Lt_89_264194;
	.loc	21	115	0
	mul.lo.u64 	%rd4, %rd3, 8;
	add.u64 	%rd5, %rd2, %rd4;
	ld.v4.u16 	{%r22,%r23,%r24,%r25}, [%rd5+0];
	.loc	6	166	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r22;
	cvt.ftz.f32.f16	%f12, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r23;
	cvt.ftz.f32.f16	%f13, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f14, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f15, %b1; }
	bra.uni 	$Lt_89_263938;
$Lt_89_264194:
	mul.lo.u64 	%rd6, %rd3, 16;
	add.u64 	%rd7, %rd2, %rd6;
	ld.v4.f32 	{%f12,%f13,%f14,%f15}, [%rd7+0];
$Lt_89_263938:
	mov.s32 	%r26, 0;
	setp.ne.s32 	%p4, %r16, %r26;
	@!%p1 bra 	$Lt_89_264706;
	.loc	6	170	0
	mov.f32 	%f16, %f8;
	mov.u32 	%r27, 0;
	setp.ne.s32 	%p5, %r16, %r27;
	@%p5 bra 	$Lt_89_264962;
	.loc	22	69	0
	cvt.ftz.sat.f32.f32 	%f16, %f8;
$Lt_89_264962:
	.loc	22	72	0
	mul.ftz.f32 	%f16, %f11, %f16;
	@!%p4 bra 	$Lt_89_265730;
	.loc	22	77	0
	mul.ftz.f32 	%f17, %f11, %f2;
	.loc	22	78	0
	mul.ftz.f32 	%f18, %f11, %f4;
	.loc	22	79	0
	mul.ftz.f32 	%f19, %f11, %f6;
	bra.uni 	$Lt_89_265474;
$Lt_89_265730:
	.loc	5	255	0
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p6, %f2, %f20;
	@!%p6 bra 	$Lt_89_265986;
	.loc	5	234	0
	neg.ftz.f32 	%f21, %f2;
	lg2.approx.ftz.f32 	%f22, %f21;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f25, %f24;
	neg.ftz.f32 	%f26, %f25;
	bra.uni 	$LDWendi___log2f_266_80;
$Lt_89_265986:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f27, %f2;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f26, %f29;
$LDWendi___log2f_266_80:
	.loc	5	256	0
	mov.f32 	%f30, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p7, %f4, %f30;
	@!%p7 bra 	$Lt_89_266498;
	.loc	5	234	0
	neg.ftz.f32 	%f31, %f4;
	lg2.approx.ftz.f32 	%f32, %f31;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f35, %f34;
	neg.ftz.f32 	%f36, %f35;
	bra.uni 	$LDWendi___log2f_266_78;
$Lt_89_266498:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f37, %f4;
	mov.f32 	%f38, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f39, %f37, %f38;
	ex2.approx.ftz.f32 	%f36, %f39;
$LDWendi___log2f_266_78:
	.loc	5	257	0
	mov.f32 	%f40, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p8, %f6, %f40;
	@!%p8 bra 	$Lt_89_267010;
	.loc	5	234	0
	neg.ftz.f32 	%f41, %f6;
	lg2.approx.ftz.f32 	%f42, %f41;
	mov.f32 	%f43, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f44, %f42, %f43;
	ex2.approx.ftz.f32 	%f45, %f44;
	neg.ftz.f32 	%f46, %f45;
	bra.uni 	$LDWendi___log2f_266_76;
$Lt_89_267010:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f47, %f6;
	mov.f32 	%f48, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f49, %f47, %f48;
	ex2.approx.ftz.f32 	%f46, %f49;
$LDWendi___log2f_266_76:
	.loc	22	83	0
	cvt.ftz.sat.f32.f32 	%f50, %f16;
	mul.ftz.f32 	%f17, %f26, %f50;
	mul.ftz.f32 	%f18, %f36, %f50;
	mul.ftz.f32 	%f19, %f46, %f50;
	mov.f32 	%f16, %f50;
$Lt_89_265474:
	cvt.ftz.sat.f32.f32 	%f51, %f15;
	mov.f32 	%f52, 0f3f800000;    	// 1
	sub.ftz.f32 	%f53, %f52, %f16;
	mul.ftz.f32 	%f54, %f51, %f53;
	add.ftz.f32 	%f55, %f54, %f16;
	mov.f32 	%f56, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f57, %f55, %f56;
	mov.f32 	%f58, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p9, %f57, %f58;
	@!%p9 bra 	$Lt_89_267778;
	mov.f32 	%f59, 0f00000000;    	// 0
	mov.f32 	%f60, 0f00000000;    	// 0
	mov.f32 	%f61, 0f00000000;    	// 0
	mov.f32 	%f62, 0f00000000;    	// 0
	bra.uni 	$Lt_89_267522;
$Lt_89_267778:
	mov.f32 	%f63, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p10, %f12, %f63;
	@!%p10 bra 	$Lt_89_268034;
	.loc	5	234	0
	neg.ftz.f32 	%f64, %f12;
	lg2.approx.ftz.f32 	%f65, %f64;
	mov.f32 	%f66, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f67, %f65, %f66;
	ex2.approx.ftz.f32 	%f68, %f67;
	neg.ftz.f32 	%f69, %f68;
	bra.uni 	$LDWendi___log2f_266_74;
$Lt_89_268034:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f70, %f12;
	mov.f32 	%f71, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f72, %f70, %f71;
	ex2.approx.ftz.f32 	%f69, %f72;
$LDWendi___log2f_266_74:
	.loc	22	97	0
	mov.f32 	%f73, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p11, %f13, %f73;
	@!%p11 bra 	$Lt_89_268546;
	.loc	5	234	0
	neg.ftz.f32 	%f74, %f13;
	lg2.approx.ftz.f32 	%f75, %f74;
	mov.f32 	%f76, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f77, %f75, %f76;
	ex2.approx.ftz.f32 	%f78, %f77;
	neg.ftz.f32 	%f79, %f78;
	bra.uni 	$LDWendi___log2f_266_72;
$Lt_89_268546:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f80, %f13;
	mov.f32 	%f81, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f82, %f80, %f81;
	ex2.approx.ftz.f32 	%f79, %f82;
$LDWendi___log2f_266_72:
	.loc	22	98	0
	mov.f32 	%f83, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p12, %f14, %f83;
	@!%p12 bra 	$Lt_89_269058;
	.loc	5	234	0
	neg.ftz.f32 	%f84, %f14;
	lg2.approx.ftz.f32 	%f85, %f84;
	mov.f32 	%f86, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f87, %f85, %f86;
	ex2.approx.ftz.f32 	%f88, %f87;
	neg.ftz.f32 	%f89, %f88;
	bra.uni 	$LDWendi___log2f_266_70;
$Lt_89_269058:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f90, %f14;
	mov.f32 	%f91, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f92, %f90, %f91;
	ex2.approx.ftz.f32 	%f89, %f92;
$LDWendi___log2f_266_70:
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f93, %f55;
	mov.f32 	%f94, %f93;
	mov.f32 	%f95, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f96, %f93, %f95;
	mov.f32 	%f97, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p13, %f96, %f97;
	@%p13 bra 	$Lt_89_269826;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f98, %f93;
	fma.rn.ftz.f32 	%f99, %f54, %f89, %f19;
	mul.ftz.f32 	%f100, %f98, %f99;
	.loc	5	214	0
	fma.rn.ftz.f32 	%f101, %f54, %f79, %f18;
	mul.ftz.f32 	%f102, %f98, %f101;
	.loc	5	215	0
	fma.rn.ftz.f32 	%f103, %f54, %f69, %f17;
	mul.ftz.f32 	%f104, %f98, %f103;
	bra.uni 	$Lt_89_269570;
$Lt_89_269826:
	.loc	5	219	0
	mov.f32 	%f100, 0f00000000;   	// 0
	mov.f32 	%f102, 0f00000000;   	// 0
	mov.f32 	%f104, 0f00000000;   	// 0
	mov.f32 	%f94, 0f00000000;    	// 0
$Lt_89_269570:
	.loc	5	266	0
	mov.f32 	%f105, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p14, %f104, %f105;
	@!%p14 bra 	$Lt_89_270082;
	.loc	5	242	0
	neg.ftz.f32 	%f106, %f104;
	lg2.approx.ftz.f32 	%f107, %f106;
	mov.f32 	%f108, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f109, %f107, %f108;
	ex2.approx.ftz.f32 	%f110, %f109;
	neg.ftz.f32 	%f111, %f110;
	bra.uni 	$LDWendi___log2f_266_68;
$Lt_89_270082:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f112, %f104;
	mov.f32 	%f113, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f114, %f112, %f113;
	ex2.approx.ftz.f32 	%f111, %f114;
$LDWendi___log2f_266_68:
	.loc	5	267	0
	mov.f32 	%f115, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p15, %f102, %f115;
	@!%p15 bra 	$Lt_89_270594;
	.loc	5	242	0
	neg.ftz.f32 	%f116, %f102;
	lg2.approx.ftz.f32 	%f117, %f116;
	mov.f32 	%f118, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f119, %f117, %f118;
	ex2.approx.ftz.f32 	%f120, %f119;
	neg.ftz.f32 	%f121, %f120;
	bra.uni 	$LDWendi___log2f_266_66;
$Lt_89_270594:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f122, %f102;
	mov.f32 	%f123, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f124, %f122, %f123;
	ex2.approx.ftz.f32 	%f121, %f124;
$LDWendi___log2f_266_66:
	.loc	5	268	0
	mov.f32 	%f125, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p16, %f100, %f125;
	@!%p16 bra 	$Lt_89_271106;
	.loc	5	242	0
	neg.ftz.f32 	%f126, %f100;
	lg2.approx.ftz.f32 	%f127, %f126;
	mov.f32 	%f128, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f129, %f127, %f128;
	ex2.approx.ftz.f32 	%f130, %f129;
	neg.ftz.f32 	%f131, %f130;
	bra.uni 	$LDWendi___log2f_266_64;
$Lt_89_271106:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f132, %f100;
	mov.f32 	%f133, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f134, %f132, %f133;
	ex2.approx.ftz.f32 	%f131, %f134;
$LDWendi___log2f_266_64:
	.loc	22	101	0
	mov.f32 	%f62, %f111;
	mov.f32 	%f61, %f121;
	mov.f32 	%f60, %f131;
	mov.f32 	%f59, %f94;
$Lt_89_267522:
	.loc	6	170	0
	mov.f32 	%f2, %f62;
	mov.f32 	%f4, %f61;
	mov.f32 	%f6, %f60;
	mov.f32 	%f9, %f59;
	bra.uni 	$Lt_89_311554;
$Lt_89_264706:
	@!%p4 bra 	$Lt_89_272130;
	mov.f32 	%f135, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f136, %f8, %f135;
	mov.f32 	%f137, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p17, %f136, %f137;
	@!%p17 bra 	$Lt_89_272386;
	mov.f32 	%f9, 0f00000000;     	// 0
	mov.f32 	%f6, 0f00000000;     	// 0
	mov.f32 	%f4, 0f00000000;     	// 0
	mov.f32 	%f2, 0f00000000;     	// 0
	bra.uni 	$Lt_89_272130;
$Lt_89_272386:
	.loc	6	183	0
	rcp.approx.ftz.f32 	%f138, %f8;
	mul.ftz.f32 	%f139, %f138, %f2;
	mov.f32 	%f140, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p18, %f139, %f140;
	@!%p18 bra 	$Lt_89_272642;
	.loc	5	242	0
	neg.ftz.f32 	%f141, %f139;
	lg2.approx.ftz.f32 	%f142, %f141;
	mov.f32 	%f143, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f144, %f142, %f143;
	ex2.approx.ftz.f32 	%f145, %f144;
	neg.ftz.f32 	%f146, %f145;
	bra.uni 	$LDWendi___log2f_266_62;
$Lt_89_272642:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f147, %f139;
	mov.f32 	%f148, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f149, %f147, %f148;
	ex2.approx.ftz.f32 	%f146, %f149;
$LDWendi___log2f_266_62:
	.loc	6	183	0
	mov.f32 	%f2, %f146;
	.loc	6	184	0
	mul.ftz.f32 	%f150, %f138, %f4;
	mov.f32 	%f151, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p19, %f150, %f151;
	@!%p19 bra 	$Lt_89_273154;
	.loc	5	242	0
	neg.ftz.f32 	%f152, %f150;
	lg2.approx.ftz.f32 	%f153, %f152;
	mov.f32 	%f154, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f155, %f153, %f154;
	ex2.approx.ftz.f32 	%f156, %f155;
	neg.ftz.f32 	%f157, %f156;
	bra.uni 	$LDWendi___log2f_266_60;
$Lt_89_273154:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f158, %f150;
	mov.f32 	%f159, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f160, %f158, %f159;
	ex2.approx.ftz.f32 	%f157, %f160;
$LDWendi___log2f_266_60:
	.loc	6	184	0
	mov.f32 	%f4, %f157;
	.loc	6	185	0
	mul.ftz.f32 	%f161, %f138, %f6;
	mov.f32 	%f162, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p20, %f161, %f162;
	@!%p20 bra 	$Lt_89_273666;
	.loc	5	242	0
	neg.ftz.f32 	%f163, %f161;
	lg2.approx.ftz.f32 	%f164, %f163;
	mov.f32 	%f165, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f166, %f164, %f165;
	ex2.approx.ftz.f32 	%f167, %f166;
	neg.ftz.f32 	%f168, %f167;
	bra.uni 	$LDWendi___log2f_266_58;
$Lt_89_273666:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f169, %f161;
	mov.f32 	%f170, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f171, %f169, %f170;
	ex2.approx.ftz.f32 	%f168, %f171;
$LDWendi___log2f_266_58:
	.loc	6	185	0
	mov.f32 	%f6, %f168;
$Lt_89_272130:
$Lt_89_271618:
	.loc	6	189	0
	mov.u32 	%r28, 3;
	setp.eq.s32 	%p21, %r12, %r28;
	@%p21 bra 	$Lt_89_258;
	mov.u32 	%r29, 11;
	setp.eq.s32 	%p22, %r12, %r29;
	@%p22 bra 	$Lt_89_770;
	mov.u32 	%r30, 17;
	setp.eq.s32 	%p23, %r12, %r30;
	@%p23 bra 	$Lt_89_1026;
	mov.u32 	%r31, 22;
	setp.eq.s32 	%p24, %r12, %r31;
	@%p24 bra 	$Lt_89_1282;
	mov.u32 	%r32, 6;
	setp.eq.s32 	%p25, %r12, %r32;
	@%p25 bra 	$Lt_89_1538;
	mov.u32 	%r33, 1;
	setp.eq.s32 	%p26, %r12, %r33;
	@%p26 bra 	$Lt_89_1794;
	mov.u32 	%r34, 13;
	setp.eq.s32 	%p27, %r12, %r34;
	@%p27 bra 	$Lt_89_2050;
	mov.u32 	%r35, 4;
	setp.eq.s32 	%p28, %r12, %r35;
	@%p28 bra 	$Lt_89_2306;
	mov.u32 	%r36, 2;
	setp.eq.s32 	%p29, %r12, %r36;
	@%p29 bra 	$Lt_89_2562;
	mov.u32 	%r37, 14;
	setp.eq.s32 	%p30, %r12, %r37;
	@%p30 bra 	$Lt_89_2818;
	mov.u32 	%r38, 12;
	setp.eq.s32 	%p31, %r12, %r38;
	@%p31 bra 	$Lt_89_3074;
	mov.u32 	%r39, 19;
	setp.eq.s32 	%p32, %r12, %r39;
	@%p32 bra 	$Lt_89_3330;
	mov.u32 	%r40, 23;
	setp.eq.s32 	%p33, %r12, %r40;
	@%p33 bra 	$Lt_89_3586;
	mov.u32 	%r41, 8;
	setp.eq.s32 	%p34, %r12, %r41;
	@%p34 bra 	$Lt_89_3842;
	mov.u32 	%r42, 24;
	setp.eq.s32 	%p35, %r12, %r42;
	@%p35 bra 	$Lt_89_4098;
	mov.u32 	%r43, 15;
	setp.eq.s32 	%p36, %r12, %r43;
	@%p36 bra 	$Lt_89_4354;
	mov.u32 	%r44, 20;
	setp.eq.s32 	%p37, %r12, %r44;
	@%p37 bra 	$Lt_89_4610;
	mov.u32 	%r45, 9;
	setp.eq.s32 	%p38, %r12, %r45;
	@%p38 bra 	$Lt_89_4866;
	mov.u32 	%r46, 5;
	setp.eq.s32 	%p39, %r12, %r46;
	@%p39 bra 	$Lt_89_5122;
	mov.u32 	%r47, 7;
	setp.eq.s32 	%p40, %r12, %r47;
	@%p40 bra 	$Lt_89_5378;
	mov.u32 	%r48, 25;
	setp.eq.s32 	%p41, %r12, %r48;
	@%p41 bra 	$Lt_89_5634;
	mov.u32 	%r49, 26;
	setp.eq.s32 	%p42, %r12, %r49;
	@%p42 bra 	$Lt_89_5890;
	mov.u32 	%r50, 10;
	setp.eq.s32 	%p43, %r12, %r50;
	@%p43 bra 	$Lt_89_6146;
	mov.u32 	%r51, 21;
	setp.eq.s32 	%p44, %r12, %r51;
	@%p44 bra 	$Lt_89_6402;
	mov.u32 	%r52, 0;
	setp.eq.s32 	%p45, %r12, %r52;
	@%p45 bra 	$Lt_89_6658;
	mov.u32 	%r53, 16;
	setp.eq.s32 	%p46, %r12, %r53;
	@%p46 bra 	$Lt_89_6914;
	bra.uni 	$Lt_89_311554;
$Lt_89_258:
	.loc	22	469	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f174, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f174;
	mov.f32 	%f176, %f175;
	mov.f32 	%f177, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f178, %f175, %f177;
	mov.f32 	%f179, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p47, %f178, %f179;
	@!%p47 bra 	$Lt_89_274434;
	mov.f32 	%f180, 0f00000000;   	// 0
	mov.f32 	%f181, 0f00000000;   	// 0
	mov.f32 	%f182, 0f00000000;   	// 0
	mov.f32 	%f176, 0f00000000;   	// 0
	bra.uni 	$Lt_89_274178;
$Lt_89_274434:
	mov.f32 	%f183, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f183, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f187, 0f3f800000;   	// 1
	mul.ftz.f32 	%f188, %f172, %f185;
	sub.ftz.f32 	%f189, %f187, %f188;
	min.ftz.f32 	%f190, %f12, %f2;
	mul.ftz.f32 	%f191, %f15, %f190;
	fma.rn.ftz.f32 	%f192, %f2, %f184, %f191;
	mul.ftz.f32 	%f193, %f186, %f192;
	fma.rn.ftz.f32 	%f182, %f12, %f189, %f193;
	min.ftz.f32 	%f194, %f13, %f4;
	mul.ftz.f32 	%f195, %f15, %f194;
	fma.rn.ftz.f32 	%f196, %f4, %f184, %f195;
	mul.ftz.f32 	%f197, %f186, %f196;
	fma.rn.ftz.f32 	%f181, %f13, %f189, %f197;
	min.ftz.f32 	%f198, %f14, %f6;
	mul.ftz.f32 	%f199, %f15, %f198;
	fma.rn.ftz.f32 	%f200, %f6, %f184, %f199;
	mul.ftz.f32 	%f201, %f186, %f200;
	fma.rn.ftz.f32 	%f180, %f14, %f189, %f201;
$Lt_89_274178:
	.loc	6	191	0
	mov.f32 	%f2, %f182;
	mov.f32 	%f4, %f181;
	mov.f32 	%f6, %f180;
	mov.f32 	%f9, %f176;
	bra.uni 	$Lt_89_311554;
$Lt_89_770:
	.loc	22	470	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f202, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f202;
	mov.f32 	%f203, %f175;
	mov.f32 	%f204, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f205, %f175, %f204;
	mov.f32 	%f206, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p48, %f205, %f206;
	@!%p48 bra 	$Lt_89_274946;
	mov.f32 	%f207, 0f00000000;   	// 0
	mov.f32 	%f208, 0f00000000;   	// 0
	mov.f32 	%f209, 0f00000000;   	// 0
	mov.f32 	%f203, 0f00000000;   	// 0
	bra.uni 	$Lt_89_274690;
$Lt_89_274946:
	mov.f32 	%f210, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f210, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f211, 0f3f800000;   	// 1
	mul.ftz.f32 	%f212, %f172, %f185;
	sub.ftz.f32 	%f189, %f211, %f212;
	max.ftz.f32 	%f213, %f12, %f2;
	mul.ftz.f32 	%f214, %f15, %f213;
	fma.rn.ftz.f32 	%f215, %f2, %f184, %f214;
	mul.ftz.f32 	%f216, %f186, %f215;
	fma.rn.ftz.f32 	%f209, %f12, %f189, %f216;
	max.ftz.f32 	%f217, %f13, %f4;
	mul.ftz.f32 	%f218, %f15, %f217;
	fma.rn.ftz.f32 	%f219, %f4, %f184, %f218;
	mul.ftz.f32 	%f220, %f186, %f219;
	fma.rn.ftz.f32 	%f208, %f13, %f189, %f220;
	max.ftz.f32 	%f221, %f14, %f6;
	mul.ftz.f32 	%f222, %f15, %f221;
	fma.rn.ftz.f32 	%f223, %f6, %f184, %f222;
	mul.ftz.f32 	%f224, %f186, %f223;
	fma.rn.ftz.f32 	%f207, %f14, %f189, %f224;
$Lt_89_274690:
	.loc	6	192	0
	mov.f32 	%f2, %f209;
	mov.f32 	%f4, %f208;
	mov.f32 	%f6, %f207;
	mov.f32 	%f9, %f203;
	bra.uni 	$Lt_89_311554;
$Lt_89_1026:
	.loc	22	471	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f225, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f225;
	mov.f32 	%f226, %f175;
	mov.f32 	%f227, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f228, %f175, %f227;
	mov.f32 	%f229, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p49, %f228, %f229;
	@!%p49 bra 	$Lt_89_275458;
	mov.f32 	%f230, 0f00000000;   	// 0
	mov.f32 	%f231, 0f00000000;   	// 0
	mov.f32 	%f232, 0f00000000;   	// 0
	mov.f32 	%f226, 0f00000000;   	// 0
	bra.uni 	$Lt_89_275202;
$Lt_89_275458:
	mov.f32 	%f233, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f233, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f234, 0f3f800000;   	// 1
	mul.ftz.f32 	%f235, %f172, %f185;
	sub.ftz.f32 	%f189, %f234, %f235;
	mul.ftz.f32 	%f236, %f12, %f2;
	mov.f32 	%f237, 0f00000000;   	// 0
	max.ftz.f32 	%f238, %f236, %f237;
	mov.f32 	%f239, 0f3f800000;   	// 1
	min.ftz.f32 	%f240, %f238, %f239;
	mul.ftz.f32 	%f241, %f15, %f240;
	fma.rn.ftz.f32 	%f242, %f2, %f184, %f241;
	mul.ftz.f32 	%f243, %f186, %f242;
	fma.rn.ftz.f32 	%f232, %f12, %f189, %f243;
	mul.ftz.f32 	%f244, %f13, %f4;
	mov.f32 	%f245, 0f00000000;   	// 0
	max.ftz.f32 	%f246, %f244, %f245;
	mov.f32 	%f247, 0f3f800000;   	// 1
	min.ftz.f32 	%f248, %f246, %f247;
	mul.ftz.f32 	%f249, %f15, %f248;
	fma.rn.ftz.f32 	%f250, %f4, %f184, %f249;
	mul.ftz.f32 	%f251, %f186, %f250;
	fma.rn.ftz.f32 	%f231, %f13, %f189, %f251;
	mul.ftz.f32 	%f252, %f14, %f6;
	mov.f32 	%f253, 0f00000000;   	// 0
	max.ftz.f32 	%f254, %f252, %f253;
	mov.f32 	%f255, 0f3f800000;   	// 1
	min.ftz.f32 	%f256, %f254, %f255;
	mul.ftz.f32 	%f257, %f15, %f256;
	fma.rn.ftz.f32 	%f258, %f6, %f184, %f257;
	mul.ftz.f32 	%f259, %f186, %f258;
	fma.rn.ftz.f32 	%f230, %f14, %f189, %f259;
$Lt_89_275202:
	.loc	6	193	0
	mov.f32 	%f2, %f232;
	mov.f32 	%f4, %f231;
	mov.f32 	%f6, %f230;
	mov.f32 	%f9, %f226;
	bra.uni 	$Lt_89_311554;
$Lt_89_1282:
	.loc	22	472	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f260, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f260;
	mov.f32 	%f261, %f175;
	mov.f32 	%f262, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f263, %f175, %f262;
	mov.f32 	%f264, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p50, %f263, %f264;
	@!%p50 bra 	$Lt_89_275970;
	mov.f32 	%f265, 0f00000000;   	// 0
	mov.f32 	%f266, 0f00000000;   	// 0
	mov.f32 	%f267, 0f00000000;   	// 0
	mov.f32 	%f261, 0f00000000;   	// 0
	bra.uni 	$Lt_89_275714;
$Lt_89_275970:
	mov.f32 	%f268, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f268, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f269, 0f3f800000;   	// 1
	mul.ftz.f32 	%f270, %f172, %f185;
	sub.ftz.f32 	%f189, %f269, %f270;
	mov.f32 	%f271, 0f3f800000;   	// 1
	mov.f32 	%f272, 0f3f800000;   	// 1
	mov.f32 	%f273, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f274, %f12, %f273;
	mov.f32 	%f275, 0f3f800000;   	// 1
	min.ftz.f32 	%f276, %f274, %f275;
	sub.ftz.f32 	%f277, %f272, %f276;
	mov.f32 	%f278, 0f3f800000;   	// 1
	mov.f32 	%f279, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f280, %f2, %f279;
	mov.f32 	%f281, 0f3f800000;   	// 1
	min.ftz.f32 	%f282, %f280, %f281;
	sub.ftz.f32 	%f283, %f278, %f282;
	mul.ftz.f32 	%f284, %f277, %f283;
	sub.ftz.f32 	%f285, %f271, %f284;
	mov.f32 	%f286, 0f00000000;   	// 0
	max.ftz.f32 	%f287, %f285, %f286;
	mov.f32 	%f288, 0f3f800000;   	// 1
	min.ftz.f32 	%f289, %f287, %f288;
	mul.ftz.f32 	%f290, %f15, %f289;
	fma.rn.ftz.f32 	%f291, %f2, %f184, %f290;
	mul.ftz.f32 	%f292, %f186, %f291;
	fma.rn.ftz.f32 	%f267, %f12, %f189, %f292;
	mov.f32 	%f293, 0f3f800000;   	// 1
	mov.f32 	%f294, 0f3f800000;   	// 1
	mov.f32 	%f295, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f296, %f13, %f295;
	mov.f32 	%f297, 0f3f800000;   	// 1
	min.ftz.f32 	%f298, %f296, %f297;
	sub.ftz.f32 	%f299, %f294, %f298;
	mov.f32 	%f300, 0f3f800000;   	// 1
	mov.f32 	%f301, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f302, %f4, %f301;
	mov.f32 	%f303, 0f3f800000;   	// 1
	min.ftz.f32 	%f304, %f302, %f303;
	sub.ftz.f32 	%f305, %f300, %f304;
	mul.ftz.f32 	%f306, %f299, %f305;
	sub.ftz.f32 	%f307, %f293, %f306;
	mov.f32 	%f308, 0f00000000;   	// 0
	max.ftz.f32 	%f309, %f307, %f308;
	mov.f32 	%f310, 0f3f800000;   	// 1
	min.ftz.f32 	%f311, %f309, %f310;
	mul.ftz.f32 	%f312, %f15, %f311;
	fma.rn.ftz.f32 	%f313, %f4, %f184, %f312;
	mul.ftz.f32 	%f314, %f186, %f313;
	fma.rn.ftz.f32 	%f266, %f13, %f189, %f314;
	mov.f32 	%f315, 0f3f800000;   	// 1
	mov.f32 	%f316, 0f3f800000;   	// 1
	mov.f32 	%f317, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f318, %f14, %f317;
	mov.f32 	%f319, 0f3f800000;   	// 1
	min.ftz.f32 	%f320, %f318, %f319;
	sub.ftz.f32 	%f321, %f316, %f320;
	mov.f32 	%f322, 0f3f800000;   	// 1
	mov.f32 	%f323, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f324, %f6, %f323;
	mov.f32 	%f325, 0f3f800000;   	// 1
	min.ftz.f32 	%f326, %f324, %f325;
	sub.ftz.f32 	%f327, %f322, %f326;
	mul.ftz.f32 	%f328, %f321, %f327;
	sub.ftz.f32 	%f329, %f315, %f328;
	mov.f32 	%f330, 0f00000000;   	// 0
	max.ftz.f32 	%f331, %f329, %f330;
	mov.f32 	%f332, 0f3f800000;   	// 1
	min.ftz.f32 	%f333, %f331, %f332;
	mul.ftz.f32 	%f334, %f15, %f333;
	fma.rn.ftz.f32 	%f335, %f6, %f184, %f334;
	mul.ftz.f32 	%f336, %f186, %f335;
	fma.rn.ftz.f32 	%f265, %f14, %f189, %f336;
$Lt_89_275714:
	.loc	6	194	0
	mov.f32 	%f2, %f267;
	mov.f32 	%f4, %f266;
	mov.f32 	%f6, %f265;
	mov.f32 	%f9, %f261;
	bra.uni 	$Lt_89_311554;
$Lt_89_1538:
	.loc	22	526	0
	mul.ftz.f32 	%f172, %f11, %f9;
	mov.f32 	%f337, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f338, %f172, %f337;
	mov.f32 	%f339, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p51, %f338, %f339;
	@!%p51 bra 	$Lt_89_231170;
	.loc	22	528	0
	mov.f32 	%f340, %f12;
	mov.f32 	%f341, %f13;
	mov.f32 	%f342, %f14;
	mov.f32 	%f343, %f15;
	bra.uni 	$LDWendi__Z4Randj_266_56;
$Lt_89_231170:
	.loc	22	530	0
	mov.f32 	%f344, 0f370637bd;   	// 8e-006
	add.ftz.f32 	%f345, %f172, %f344;
	mov.f32 	%f346, 0f3f800000;   	// 1
	setp.ge.ftz.f32 	%p52, %f345, %f346;
	@!%p52 bra 	$Lt_89_231426;
	.loc	22	532	0
	mov.f32 	%f340, %f2;
	mov.f32 	%f341, %f4;
	mov.f32 	%f342, %f6;
	mov.f32 	%f343, %f9;
	bra.uni 	$LDWendi__Z4Randj_266_56;
$Lt_89_231426:
	.loc	21	143	0
	cvt.s32.u32 	%r54, %ctaid.y;
	cvt.s32.u32 	%r55, %ntid.y;
	mul.lo.s32 	%r56, %r54, %r55;
	cvt.s32.u32 	%r57, %ctaid.x;
	cvt.s32.u32 	%r58, %ntid.x;
	mul.lo.s32 	%r59, %r57, %r58;
	mov.u32 	%r60, %tid.y;
	add.u32 	%r61, %r56, %r60;
	mov.u32 	%r62, %tid.x;
	add.u32 	%r63, %r59, %r62;
	shr.u32 	%r64, %r61, 13;
	mov.s32 	%r65, 1;
	sub.s32 	%r66, %r65, %r63;
	sub.u32 	%r67, %r63, %r61;
	sub.u32 	%r68, %r66, %r61;
	xor.b32 	%r69, %r64, %r68;
	shl.b32 	%r70, %r69, 8;
	sub.u32 	%r71, %r67, %r69;
	sub.u32 	%r72, %r61, %r69;
	xor.b32 	%r73, %r70, %r71;
	shr.u32 	%r74, %r73, 13;
	sub.u32 	%r75, %r72, %r73;
	sub.u32 	%r76, %r69, %r73;
	xor.b32 	%r77, %r74, %r75;
	shr.u32 	%r78, %r77, 12;
	sub.u32 	%r79, %r76, %r77;
	xor.b32 	%r80, %r78, %r79;
	sub.u32 	%r81, %r73, %r77;
	sub.u32 	%r82, %r81, %r80;
	shl.b32 	%r83, %r80, 16;
	xor.b32 	%r84, %r82, %r83;
	.loc	21	144	0
	sub.u32 	%r85, %r77, %r80;
	sub.u32 	%r86, %r85, %r84;
	shr.u32 	%r87, %r84, 5;
	xor.b32 	%r88, %r86, %r87;
	.loc	21	145	0
	sub.u32 	%r89, %r80, %r84;
	sub.u32 	%r90, %r89, %r88;
	shr.u32 	%r91, %r88, 3;
	xor.b32 	%r92, %r90, %r91;
	.loc	21	146	0
	sub.u32 	%r93, %r84, %r88;
	sub.u32 	%r94, %r93, %r92;
	shl.b32 	%r95, %r92, 10;
	xor.b32 	%r96, %r94, %r95;
	.loc	21	147	0
	sub.u32 	%r97, %r88, %r92;
	sub.u32 	%r98, %r97, %r96;
	shr.u32 	%r99, %r96, 15;
	xor.b32 	%r100, %r98, %r99;
	.loc	22	537	0
	mov.f32 	%f347, 0f46fffe00;   	// 32767
	mul.ftz.f32 	%f348, %f172, %f347;
	cvt.rzi.ftz.s32.f32 	%r101, %f348;
	mul.lo.u32 	%r102, %r100, 1103515245;
	add.u32 	%r103, %r102, 12345;
	shr.u32 	%r104, %r103, 16;
	and.b32 	%r105, %r104, 255;
	shl.b32 	%r106, %r105, 7;
	mul.lo.u32 	%r107, %r100, -1029531031;
	sub.u32 	%r108, %r107, 740551042;
	shr.u32 	%r109, %r108, 16;
	and.b32 	%r110, %r109, 255;
	xor.b32 	%r111, %r106, %r110;
	setp.lt.s32 	%p53, %r101, %r111;
	@%p53 bra 	$Lt_89_276482;
	mov.f32 	%f349, %f2;
	mov.f32 	%f350, %f4;
	mov.f32 	%f351, %f6;
	mov.f32 	%f352, %f9;
	bra.uni 	$Lt_89_276226;
$Lt_89_276482:
	mov.f32 	%f349, %f12;
	mov.f32 	%f350, %f13;
	mov.f32 	%f351, %f14;
	mov.f32 	%f352, %f15;
$Lt_89_276226:
	mov.f32 	%f340, %f349;
	mov.f32 	%f341, %f350;
	mov.f32 	%f342, %f351;
	mov.f32 	%f343, %f352;
$LDWendi__Z4Randj_266_56:
	.loc	6	195	0
	mov.f32 	%f2, %f340;
	mov.f32 	%f4, %f341;
	mov.f32 	%f6, %f342;
	mov.f32 	%f9, %f343;
	bra.uni 	$Lt_89_311554;
$Lt_89_1794:
	.loc	22	473	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f353, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f353;
	mov.f32 	%f354, %f175;
	mov.f32 	%f355, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f356, %f175, %f355;
	mov.f32 	%f357, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p54, %f356, %f357;
	@!%p54 bra 	$Lt_89_276994;
	mov.f32 	%f358, 0f00000000;   	// 0
	mov.f32 	%f359, 0f00000000;   	// 0
	mov.f32 	%f360, 0f00000000;   	// 0
	mov.f32 	%f354, 0f00000000;   	// 0
	bra.uni 	$Lt_89_276738;
$Lt_89_276994:
	mov.f32 	%f361, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f361, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f362, 0f3f800000;   	// 1
	mul.ftz.f32 	%f363, %f172, %f185;
	sub.ftz.f32 	%f189, %f362, %f363;
	mov.f32 	%f364, 0f3f800000;   	// 1
	mov.f32 	%f365, 0f3f800000;   	// 1
	mov.f32 	%f366, 0f00000000;   	// 0
	max.ftz.f32 	%f367, %f12, %f366;
	mov.f32 	%f368, 0f3f800000;   	// 1
	min.ftz.f32 	%f369, %f367, %f368;
	sub.ftz.f32 	%f370, %f365, %f369;
	mov.f32 	%f371, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f372, %f2, %f371;
	mov.f32 	%f373, 0f3f800000;   	// 1
	min.ftz.f32 	%f374, %f372, %f373;
	div.approx.ftz.f32 	%f375, %f370, %f374;
	sub.ftz.f32 	%f376, %f364, %f375;
	mov.f32 	%f377, 0f00000000;   	// 0
	max.ftz.f32 	%f378, %f376, %f377;
	mov.f32 	%f379, 0f3f800000;   	// 1
	min.ftz.f32 	%f380, %f378, %f379;
	mul.ftz.f32 	%f381, %f15, %f380;
	fma.rn.ftz.f32 	%f382, %f2, %f184, %f381;
	mul.ftz.f32 	%f383, %f186, %f382;
	fma.rn.ftz.f32 	%f360, %f12, %f189, %f383;
	mov.f32 	%f384, 0f3f800000;   	// 1
	mov.f32 	%f385, 0f3f800000;   	// 1
	mov.f32 	%f386, 0f00000000;   	// 0
	max.ftz.f32 	%f387, %f13, %f386;
	mov.f32 	%f388, 0f3f800000;   	// 1
	min.ftz.f32 	%f389, %f387, %f388;
	sub.ftz.f32 	%f390, %f385, %f389;
	mov.f32 	%f391, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f392, %f4, %f391;
	mov.f32 	%f393, 0f3f800000;   	// 1
	min.ftz.f32 	%f394, %f392, %f393;
	div.approx.ftz.f32 	%f395, %f390, %f394;
	sub.ftz.f32 	%f396, %f384, %f395;
	mov.f32 	%f397, 0f00000000;   	// 0
	max.ftz.f32 	%f398, %f396, %f397;
	mov.f32 	%f399, 0f3f800000;   	// 1
	min.ftz.f32 	%f400, %f398, %f399;
	mul.ftz.f32 	%f401, %f15, %f400;
	fma.rn.ftz.f32 	%f402, %f4, %f184, %f401;
	mul.ftz.f32 	%f403, %f186, %f402;
	fma.rn.ftz.f32 	%f359, %f13, %f189, %f403;
	mov.f32 	%f404, 0f3f800000;   	// 1
	mov.f32 	%f405, 0f3f800000;   	// 1
	mov.f32 	%f406, 0f00000000;   	// 0
	max.ftz.f32 	%f407, %f14, %f406;
	mov.f32 	%f408, 0f3f800000;   	// 1
	min.ftz.f32 	%f409, %f407, %f408;
	sub.ftz.f32 	%f410, %f405, %f409;
	mov.f32 	%f411, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f412, %f6, %f411;
	mov.f32 	%f413, 0f3f800000;   	// 1
	min.ftz.f32 	%f414, %f412, %f413;
	div.approx.ftz.f32 	%f415, %f410, %f414;
	sub.ftz.f32 	%f416, %f404, %f415;
	mov.f32 	%f417, 0f00000000;   	// 0
	max.ftz.f32 	%f418, %f416, %f417;
	mov.f32 	%f419, 0f3f800000;   	// 1
	min.ftz.f32 	%f420, %f418, %f419;
	mul.ftz.f32 	%f421, %f15, %f420;
	fma.rn.ftz.f32 	%f422, %f6, %f184, %f421;
	mul.ftz.f32 	%f423, %f186, %f422;
	fma.rn.ftz.f32 	%f358, %f14, %f189, %f423;
$Lt_89_276738:
	.loc	6	196	0
	mov.f32 	%f2, %f360;
	mov.f32 	%f4, %f359;
	mov.f32 	%f6, %f358;
	mov.f32 	%f9, %f354;
	bra.uni 	$Lt_89_311554;
$Lt_89_2050:
	.loc	22	474	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f424, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f424;
	mov.f32 	%f425, %f175;
	mov.f32 	%f426, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f427, %f175, %f426;
	mov.f32 	%f428, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p55, %f427, %f428;
	@!%p55 bra 	$Lt_89_277506;
	mov.f32 	%f429, 0f00000000;   	// 0
	mov.f32 	%f430, 0f00000000;   	// 0
	mov.f32 	%f431, 0f00000000;   	// 0
	mov.f32 	%f425, 0f00000000;   	// 0
	bra.uni 	$Lt_89_277250;
$Lt_89_277506:
	mov.f32 	%f432, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f432, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f433, 0f3f800000;   	// 1
	mul.ftz.f32 	%f434, %f172, %f185;
	sub.ftz.f32 	%f189, %f433, %f434;
	mov.f32 	%f435, 0f00000000;   	// 0
	max.ftz.f32 	%f436, %f12, %f435;
	mov.f32 	%f437, 0f3f800000;   	// 1
	min.ftz.f32 	%f438, %f436, %f437;
	mov.f32 	%f439, 0f00000000;   	// 0
	max.ftz.f32 	%f440, %f2, %f439;
	mov.f32 	%f441, 0f3f800000;   	// 1
	min.ftz.f32 	%f442, %f440, %f441;
	add.ftz.f32 	%f443, %f438, %f442;
	mov.f32 	%f444, 0fbf800000;   	// -1
	add.ftz.f32 	%f445, %f443, %f444;
	mov.f32 	%f446, 0f00000000;   	// 0
	max.ftz.f32 	%f447, %f445, %f446;
	mov.f32 	%f448, 0f3f800000;   	// 1
	min.ftz.f32 	%f449, %f447, %f448;
	mul.ftz.f32 	%f450, %f15, %f449;
	fma.rn.ftz.f32 	%f451, %f2, %f184, %f450;
	mul.ftz.f32 	%f452, %f186, %f451;
	fma.rn.ftz.f32 	%f431, %f12, %f189, %f452;
	mov.f32 	%f453, 0f00000000;   	// 0
	max.ftz.f32 	%f454, %f13, %f453;
	mov.f32 	%f455, 0f3f800000;   	// 1
	min.ftz.f32 	%f456, %f454, %f455;
	mov.f32 	%f457, 0f00000000;   	// 0
	max.ftz.f32 	%f458, %f4, %f457;
	mov.f32 	%f459, 0f3f800000;   	// 1
	min.ftz.f32 	%f460, %f458, %f459;
	add.ftz.f32 	%f461, %f456, %f460;
	mov.f32 	%f462, 0fbf800000;   	// -1
	add.ftz.f32 	%f463, %f461, %f462;
	mov.f32 	%f464, 0f00000000;   	// 0
	max.ftz.f32 	%f465, %f463, %f464;
	mov.f32 	%f466, 0f3f800000;   	// 1
	min.ftz.f32 	%f467, %f465, %f466;
	mul.ftz.f32 	%f468, %f15, %f467;
	fma.rn.ftz.f32 	%f469, %f4, %f184, %f468;
	mul.ftz.f32 	%f470, %f186, %f469;
	fma.rn.ftz.f32 	%f430, %f13, %f189, %f470;
	mov.f32 	%f471, 0f00000000;   	// 0
	max.ftz.f32 	%f472, %f14, %f471;
	mov.f32 	%f473, 0f3f800000;   	// 1
	min.ftz.f32 	%f474, %f472, %f473;
	mov.f32 	%f475, 0f00000000;   	// 0
	max.ftz.f32 	%f476, %f6, %f475;
	mov.f32 	%f477, 0f3f800000;   	// 1
	min.ftz.f32 	%f478, %f476, %f477;
	add.ftz.f32 	%f479, %f474, %f478;
	mov.f32 	%f480, 0fbf800000;   	// -1
	add.ftz.f32 	%f481, %f479, %f480;
	mov.f32 	%f482, 0f00000000;   	// 0
	max.ftz.f32 	%f483, %f481, %f482;
	mov.f32 	%f484, 0f3f800000;   	// 1
	min.ftz.f32 	%f485, %f483, %f484;
	mul.ftz.f32 	%f486, %f15, %f485;
	fma.rn.ftz.f32 	%f487, %f6, %f184, %f486;
	mul.ftz.f32 	%f488, %f186, %f487;
	fma.rn.ftz.f32 	%f429, %f14, %f189, %f488;
$Lt_89_277250:
	.loc	6	197	0
	mov.f32 	%f2, %f431;
	mov.f32 	%f4, %f430;
	mov.f32 	%f6, %f429;
	mov.f32 	%f9, %f425;
	bra.uni 	$Lt_89_311554;
$Lt_89_2306:
	.loc	6	198	0
	mul.ftz.f32 	%f172, %f11, %f9;
	mov.f32 	%f489, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f490, %f172, %f489;
	mov.f32 	%f491, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p56, %f490, %f491;
	@!%p56 bra 	$Lt_89_278018;
	.loc	22	608	0
	mov.f32 	%f492, %f12;
	mov.f32 	%f493, %f13;
	mov.f32 	%f494, %f14;
	mov.f32 	%f495, %f15;
	bra.uni 	$Lt_89_278786;
$Lt_89_278018:
	mov.f32 	%f496, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f497, %f15, %f496;
	mov.f32 	%f498, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p57, %f497, %f498;
	@!%p57 bra 	$Lt_89_278530;
	mov.f32 	%f492, %f2;
	mov.f32 	%f493, %f4;
	mov.f32 	%f494, %f6;
	mov.f32 	%f495, %f172;
	bra.uni 	$Lt_89_278786;
$Lt_89_278530:
	mov.u32 	%r112, 720;
	setp.gt.s32 	%p58, %r10, %r112;
	@%p58 bra 	$Lt_89_279042;
	.loc	22	555	0
	ld.const.f32 	%f499, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f500, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f501, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f502, %f13, %f501;
	fma.rn.ftz.f32 	%f503, %f500, %f14, %f502;
	fma.rn.ftz.f32 	%f504, %f499, %f12, %f503;
	cvt.ftz.sat.f32.f32 	%f505, %f504;
	mul.ftz.f32 	%f506, %f501, %f4;
	fma.rn.ftz.f32 	%f507, %f500, %f6, %f506;
	fma.rn.ftz.f32 	%f508, %f499, %f2, %f507;
	cvt.ftz.sat.f32.f32 	%f509, %f508;
	setp.gt.ftz.f32 	%p59, %f505, %f509;
	@!%p59 bra 	$Lt_89_233218;
	.loc	22	468	0
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f510, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f510;
	mov.f32 	%f511, %f175;
	mov.f32 	%f512, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f513, %f175, %f512;
	mov.f32 	%f514, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p60, %f513, %f514;
	@!%p60 bra 	$Lt_89_279554;
	mov.f32 	%f515, 0f00000000;   	// 0
	mov.f32 	%f516, 0f00000000;   	// 0
	mov.f32 	%f517, 0f00000000;   	// 0
	mov.f32 	%f511, 0f00000000;   	// 0
	bra.uni 	$Lt_89_279298;
$Lt_89_279554:
	mov.f32 	%f518, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f518, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f519, 0f3f800000;   	// 1
	mul.ftz.f32 	%f520, %f172, %f185;
	sub.ftz.f32 	%f189, %f519, %f520;
	mul.ftz.f32 	%f521, %f184, %f2;
	fma.rn.ftz.f32 	%f522, %f2, %f15, %f521;
	mul.ftz.f32 	%f523, %f186, %f522;
	fma.rn.ftz.f32 	%f517, %f12, %f189, %f523;
	mul.ftz.f32 	%f524, %f184, %f4;
	fma.rn.ftz.f32 	%f525, %f4, %f15, %f524;
	mul.ftz.f32 	%f526, %f186, %f525;
	fma.rn.ftz.f32 	%f516, %f13, %f189, %f526;
	mul.ftz.f32 	%f527, %f184, %f6;
	fma.rn.ftz.f32 	%f528, %f6, %f15, %f527;
	mul.ftz.f32 	%f529, %f186, %f528;
	fma.rn.ftz.f32 	%f515, %f14, %f189, %f529;
$Lt_89_279298:
	.loc	22	557	0
	mov.f32 	%f530, %f517;
	mov.f32 	%f531, %f516;
	mov.f32 	%f532, %f515;
	mov.f32 	%f533, %f511;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_266_54;
$Lt_89_233218:
	.loc	22	561	0
	mov.f32 	%f530, %f12;
	mov.f32 	%f531, %f13;
	mov.f32 	%f532, %f14;
	mov.f32 	%f533, %f15;
$LDWendi__Z10GetLuma6018PixelRGB_266_54:
	.loc	22	608	0
	mov.f32 	%f492, %f530;
	mov.f32 	%f493, %f531;
	mov.f32 	%f494, %f532;
	mov.f32 	%f495, %f533;
	bra.uni 	$Lt_89_278786;
$Lt_89_279042:
	.loc	22	569	0
	ld.const.f32 	%f534, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f535, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f536, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f537, %f13, %f536;
	fma.rn.ftz.f32 	%f538, %f535, %f14, %f537;
	fma.rn.ftz.f32 	%f539, %f534, %f12, %f538;
	cvt.ftz.sat.f32.f32 	%f540, %f539;
	mul.ftz.f32 	%f541, %f536, %f4;
	fma.rn.ftz.f32 	%f542, %f535, %f6, %f541;
	fma.rn.ftz.f32 	%f543, %f534, %f2, %f542;
	cvt.ftz.sat.f32.f32 	%f544, %f543;
	setp.gt.ftz.f32 	%p61, %f540, %f544;
	@!%p61 bra 	$Lt_89_233730;
	.loc	22	468	0
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f545, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f545;
	mov.f32 	%f546, %f175;
	mov.f32 	%f547, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f548, %f175, %f547;
	mov.f32 	%f549, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p62, %f548, %f549;
	@!%p62 bra 	$Lt_89_280066;
	mov.f32 	%f550, 0f00000000;   	// 0
	mov.f32 	%f551, 0f00000000;   	// 0
	mov.f32 	%f552, 0f00000000;   	// 0
	mov.f32 	%f546, 0f00000000;   	// 0
	bra.uni 	$Lt_89_279810;
$Lt_89_280066:
	mov.f32 	%f553, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f553, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f554, 0f3f800000;   	// 1
	mul.ftz.f32 	%f555, %f172, %f185;
	sub.ftz.f32 	%f189, %f554, %f555;
	mul.ftz.f32 	%f556, %f184, %f2;
	fma.rn.ftz.f32 	%f557, %f2, %f15, %f556;
	mul.ftz.f32 	%f558, %f186, %f557;
	fma.rn.ftz.f32 	%f552, %f12, %f189, %f558;
	mul.ftz.f32 	%f559, %f184, %f4;
	fma.rn.ftz.f32 	%f560, %f4, %f15, %f559;
	mul.ftz.f32 	%f561, %f186, %f560;
	fma.rn.ftz.f32 	%f551, %f13, %f189, %f561;
	mul.ftz.f32 	%f562, %f184, %f6;
	fma.rn.ftz.f32 	%f563, %f6, %f15, %f562;
	mul.ftz.f32 	%f564, %f186, %f563;
	fma.rn.ftz.f32 	%f550, %f14, %f189, %f564;
$Lt_89_279810:
	.loc	22	571	0
	mov.f32 	%f565, %f552;
	mov.f32 	%f566, %f551;
	mov.f32 	%f567, %f550;
	mov.f32 	%f568, %f546;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_266_52;
$Lt_89_233730:
	.loc	22	575	0
	mov.f32 	%f565, %f12;
	mov.f32 	%f566, %f13;
	mov.f32 	%f567, %f14;
	mov.f32 	%f568, %f15;
$LDWendi__Z10GetLuma7098PixelRGB_266_52:
	.loc	22	608	0
	mov.f32 	%f492, %f565;
	mov.f32 	%f493, %f566;
	mov.f32 	%f494, %f567;
	mov.f32 	%f495, %f568;
$Lt_89_278786:
$Lt_89_278274:
$Lt_89_277762:
	.loc	6	198	0
	mov.f32 	%f2, %f492;
	mov.f32 	%f4, %f493;
	mov.f32 	%f6, %f494;
	mov.f32 	%f9, %f495;
	bra.uni 	$Lt_89_311554;
$Lt_89_2562:
	.loc	22	475	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f569, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f569;
	mov.f32 	%f570, %f175;
	mov.f32 	%f571, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f572, %f175, %f571;
	mov.f32 	%f573, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p63, %f572, %f573;
	@!%p63 bra 	$Lt_89_280578;
	mov.f32 	%f574, 0f00000000;   	// 0
	mov.f32 	%f575, 0f00000000;   	// 0
	mov.f32 	%f576, 0f00000000;   	// 0
	mov.f32 	%f570, 0f00000000;   	// 0
	bra.uni 	$Lt_89_280322;
$Lt_89_280578:
	mov.f32 	%f577, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f577, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f578, 0f3f800000;   	// 1
	mul.ftz.f32 	%f579, %f172, %f185;
	sub.ftz.f32 	%f189, %f578, %f579;
	mov.f32 	%f580, 0f00000000;   	// 0
	max.ftz.f32 	%f581, %f12, %f580;
	mov.f32 	%f582, 0f3f800000;   	// 1
	min.ftz.f32 	%f583, %f581, %f582;
	mov.f32 	%f584, 0f3f800000;   	// 1
	mov.f32 	%f585, 0f00000000;   	// 0
	max.ftz.f32 	%f586, %f2, %f585;
	mov.f32 	%f587, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f588, %f586, %f587;
	sub.ftz.f32 	%f589, %f584, %f588;
	div.approx.ftz.f32 	%f590, %f583, %f589;
	mov.f32 	%f591, 0f00000000;   	// 0
	max.ftz.f32 	%f592, %f590, %f591;
	mov.f32 	%f593, 0f3f800000;   	// 1
	min.ftz.f32 	%f594, %f592, %f593;
	mul.ftz.f32 	%f595, %f15, %f594;
	fma.rn.ftz.f32 	%f596, %f2, %f184, %f595;
	mul.ftz.f32 	%f597, %f186, %f596;
	fma.rn.ftz.f32 	%f576, %f12, %f189, %f597;
	mov.f32 	%f598, 0f00000000;   	// 0
	max.ftz.f32 	%f599, %f13, %f598;
	mov.f32 	%f600, 0f3f800000;   	// 1
	min.ftz.f32 	%f601, %f599, %f600;
	mov.f32 	%f602, 0f3f800000;   	// 1
	mov.f32 	%f603, 0f00000000;   	// 0
	max.ftz.f32 	%f604, %f4, %f603;
	mov.f32 	%f605, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f606, %f604, %f605;
	sub.ftz.f32 	%f607, %f602, %f606;
	div.approx.ftz.f32 	%f608, %f601, %f607;
	mov.f32 	%f609, 0f00000000;   	// 0
	max.ftz.f32 	%f610, %f608, %f609;
	mov.f32 	%f611, 0f3f800000;   	// 1
	min.ftz.f32 	%f612, %f610, %f611;
	mul.ftz.f32 	%f613, %f15, %f612;
	fma.rn.ftz.f32 	%f614, %f4, %f184, %f613;
	mul.ftz.f32 	%f615, %f186, %f614;
	fma.rn.ftz.f32 	%f575, %f13, %f189, %f615;
	mov.f32 	%f616, 0f00000000;   	// 0
	max.ftz.f32 	%f617, %f14, %f616;
	mov.f32 	%f618, 0f3f800000;   	// 1
	min.ftz.f32 	%f619, %f617, %f618;
	mov.f32 	%f620, 0f3f800000;   	// 1
	mov.f32 	%f621, 0f00000000;   	// 0
	max.ftz.f32 	%f622, %f6, %f621;
	mov.f32 	%f623, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f624, %f622, %f623;
	sub.ftz.f32 	%f625, %f620, %f624;
	div.approx.ftz.f32 	%f626, %f619, %f625;
	mov.f32 	%f627, 0f00000000;   	// 0
	max.ftz.f32 	%f628, %f626, %f627;
	mov.f32 	%f629, 0f3f800000;   	// 1
	min.ftz.f32 	%f630, %f628, %f629;
	mul.ftz.f32 	%f631, %f15, %f630;
	fma.rn.ftz.f32 	%f632, %f6, %f184, %f631;
	mul.ftz.f32 	%f633, %f186, %f632;
	fma.rn.ftz.f32 	%f574, %f14, %f189, %f633;
$Lt_89_280322:
	.loc	6	199	0
	mov.f32 	%f2, %f576;
	mov.f32 	%f4, %f575;
	mov.f32 	%f6, %f574;
	mov.f32 	%f9, %f570;
	bra.uni 	$Lt_89_311554;
$Lt_89_2818:
	.loc	22	476	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f634, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f634;
	mov.f32 	%f635, %f175;
	mov.f32 	%f636, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f637, %f175, %f636;
	mov.f32 	%f638, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p64, %f637, %f638;
	@!%p64 bra 	$Lt_89_281090;
	mov.f32 	%f639, 0f00000000;   	// 0
	mov.f32 	%f640, 0f00000000;   	// 0
	mov.f32 	%f641, 0f00000000;   	// 0
	mov.f32 	%f635, 0f00000000;   	// 0
	bra.uni 	$Lt_89_280834;
$Lt_89_281090:
	mov.f32 	%f642, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f642, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f643, 0f3f800000;   	// 1
	mul.ftz.f32 	%f644, %f172, %f185;
	sub.ftz.f32 	%f189, %f643, %f644;
	add.ftz.f32 	%f645, %f12, %f2;
	mov.f32 	%f646, 0f00000000;   	// 0
	max.ftz.f32 	%f647, %f645, %f646;
	mov.f32 	%f648, 0f3f800000;   	// 1
	min.ftz.f32 	%f649, %f647, %f648;
	mul.ftz.f32 	%f650, %f15, %f649;
	fma.rn.ftz.f32 	%f651, %f2, %f184, %f650;
	mul.ftz.f32 	%f652, %f186, %f651;
	fma.rn.ftz.f32 	%f641, %f12, %f189, %f652;
	add.ftz.f32 	%f653, %f13, %f4;
	mov.f32 	%f654, 0f00000000;   	// 0
	max.ftz.f32 	%f655, %f653, %f654;
	mov.f32 	%f656, 0f3f800000;   	// 1
	min.ftz.f32 	%f657, %f655, %f656;
	mul.ftz.f32 	%f658, %f15, %f657;
	fma.rn.ftz.f32 	%f659, %f4, %f184, %f658;
	mul.ftz.f32 	%f660, %f186, %f659;
	fma.rn.ftz.f32 	%f640, %f13, %f189, %f660;
	add.ftz.f32 	%f661, %f14, %f6;
	mov.f32 	%f662, 0f00000000;   	// 0
	max.ftz.f32 	%f663, %f661, %f662;
	mov.f32 	%f664, 0f3f800000;   	// 1
	min.ftz.f32 	%f665, %f663, %f664;
	mul.ftz.f32 	%f666, %f15, %f665;
	fma.rn.ftz.f32 	%f667, %f6, %f184, %f666;
	mul.ftz.f32 	%f668, %f186, %f667;
	fma.rn.ftz.f32 	%f639, %f14, %f189, %f668;
$Lt_89_280834:
	.loc	6	200	0
	mov.f32 	%f2, %f641;
	mov.f32 	%f4, %f640;
	mov.f32 	%f6, %f639;
	mov.f32 	%f9, %f635;
	bra.uni 	$Lt_89_311554;
$Lt_89_3074:
	.loc	6	201	0
	mul.ftz.f32 	%f172, %f11, %f9;
	mov.f32 	%f669, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f670, %f172, %f669;
	mov.f32 	%f671, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p65, %f670, %f671;
	@!%p65 bra 	$Lt_89_281602;
	.loc	22	609	0
	mov.f32 	%f672, %f12;
	mov.f32 	%f673, %f13;
	mov.f32 	%f674, %f14;
	mov.f32 	%f675, %f15;
	bra.uni 	$Lt_89_282370;
$Lt_89_281602:
	mov.f32 	%f676, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f677, %f15, %f676;
	mov.f32 	%f678, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p66, %f677, %f678;
	@!%p66 bra 	$Lt_89_282114;
	mov.f32 	%f672, %f2;
	mov.f32 	%f673, %f4;
	mov.f32 	%f674, %f6;
	mov.f32 	%f675, %f172;
	bra.uni 	$Lt_89_282370;
$Lt_89_282114:
	mov.u32 	%r113, 720;
	setp.gt.s32 	%p67, %r10, %r113;
	@%p67 bra 	$Lt_89_282626;
	.loc	22	584	0
	ld.const.f32 	%f499, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f500, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f501, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f679, %f13, %f501;
	fma.rn.ftz.f32 	%f680, %f500, %f14, %f679;
	fma.rn.ftz.f32 	%f681, %f499, %f12, %f680;
	cvt.ftz.sat.f32.f32 	%f682, %f681;
	mul.ftz.f32 	%f683, %f501, %f4;
	fma.rn.ftz.f32 	%f684, %f500, %f6, %f683;
	fma.rn.ftz.f32 	%f685, %f499, %f2, %f684;
	cvt.ftz.sat.f32.f32 	%f686, %f685;
	setp.lt.ftz.f32 	%p68, %f682, %f686;
	@!%p68 bra 	$Lt_89_235522;
	.loc	22	468	0
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f687, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f687;
	mov.f32 	%f688, %f175;
	mov.f32 	%f689, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f690, %f175, %f689;
	mov.f32 	%f691, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p69, %f690, %f691;
	@!%p69 bra 	$Lt_89_283138;
	mov.f32 	%f692, 0f00000000;   	// 0
	mov.f32 	%f693, 0f00000000;   	// 0
	mov.f32 	%f694, 0f00000000;   	// 0
	mov.f32 	%f688, 0f00000000;   	// 0
	bra.uni 	$Lt_89_282882;
$Lt_89_283138:
	mov.f32 	%f695, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f695, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f696, 0f3f800000;   	// 1
	mul.ftz.f32 	%f697, %f172, %f185;
	sub.ftz.f32 	%f189, %f696, %f697;
	mul.ftz.f32 	%f698, %f184, %f2;
	fma.rn.ftz.f32 	%f699, %f2, %f15, %f698;
	mul.ftz.f32 	%f700, %f186, %f699;
	fma.rn.ftz.f32 	%f694, %f12, %f189, %f700;
	mul.ftz.f32 	%f701, %f184, %f4;
	fma.rn.ftz.f32 	%f702, %f4, %f15, %f701;
	mul.ftz.f32 	%f703, %f186, %f702;
	fma.rn.ftz.f32 	%f693, %f13, %f189, %f703;
	mul.ftz.f32 	%f704, %f184, %f6;
	fma.rn.ftz.f32 	%f705, %f6, %f15, %f704;
	mul.ftz.f32 	%f706, %f186, %f705;
	fma.rn.ftz.f32 	%f692, %f14, %f189, %f706;
$Lt_89_282882:
	.loc	22	586	0
	mov.f32 	%f707, %f694;
	mov.f32 	%f708, %f693;
	mov.f32 	%f709, %f692;
	mov.f32 	%f710, %f688;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_266_50;
$Lt_89_235522:
	.loc	22	590	0
	mov.f32 	%f707, %f12;
	mov.f32 	%f708, %f13;
	mov.f32 	%f709, %f14;
	mov.f32 	%f710, %f15;
$LDWendi__Z10GetLuma6018PixelRGB_266_50:
	.loc	22	609	0
	mov.f32 	%f672, %f707;
	mov.f32 	%f673, %f708;
	mov.f32 	%f674, %f709;
	mov.f32 	%f675, %f710;
	bra.uni 	$Lt_89_282370;
$Lt_89_282626:
	.loc	22	598	0
	ld.const.f32 	%f711, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f712, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f713, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f714, %f13, %f713;
	fma.rn.ftz.f32 	%f715, %f712, %f14, %f714;
	fma.rn.ftz.f32 	%f716, %f711, %f12, %f715;
	cvt.ftz.sat.f32.f32 	%f717, %f716;
	mul.ftz.f32 	%f718, %f713, %f4;
	fma.rn.ftz.f32 	%f719, %f712, %f6, %f718;
	fma.rn.ftz.f32 	%f720, %f711, %f2, %f719;
	cvt.ftz.sat.f32.f32 	%f721, %f720;
	setp.lt.ftz.f32 	%p70, %f717, %f721;
	@!%p70 bra 	$Lt_89_236034;
	.loc	22	468	0
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f722, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f722;
	mov.f32 	%f723, %f175;
	mov.f32 	%f724, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f725, %f175, %f724;
	mov.f32 	%f726, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p71, %f725, %f726;
	@!%p71 bra 	$Lt_89_283650;
	mov.f32 	%f727, 0f00000000;   	// 0
	mov.f32 	%f728, 0f00000000;   	// 0
	mov.f32 	%f729, 0f00000000;   	// 0
	mov.f32 	%f723, 0f00000000;   	// 0
	bra.uni 	$Lt_89_283394;
$Lt_89_283650:
	mov.f32 	%f730, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f730, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f731, 0f3f800000;   	// 1
	mul.ftz.f32 	%f732, %f172, %f185;
	sub.ftz.f32 	%f189, %f731, %f732;
	mul.ftz.f32 	%f733, %f184, %f2;
	fma.rn.ftz.f32 	%f734, %f2, %f15, %f733;
	mul.ftz.f32 	%f735, %f186, %f734;
	fma.rn.ftz.f32 	%f729, %f12, %f189, %f735;
	mul.ftz.f32 	%f736, %f184, %f4;
	fma.rn.ftz.f32 	%f737, %f4, %f15, %f736;
	mul.ftz.f32 	%f738, %f186, %f737;
	fma.rn.ftz.f32 	%f728, %f13, %f189, %f738;
	mul.ftz.f32 	%f739, %f184, %f6;
	fma.rn.ftz.f32 	%f740, %f6, %f15, %f739;
	mul.ftz.f32 	%f741, %f186, %f740;
	fma.rn.ftz.f32 	%f727, %f14, %f189, %f741;
$Lt_89_283394:
	.loc	22	600	0
	mov.f32 	%f742, %f729;
	mov.f32 	%f743, %f728;
	mov.f32 	%f744, %f727;
	mov.f32 	%f745, %f723;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_266_48;
$Lt_89_236034:
	.loc	22	604	0
	mov.f32 	%f742, %f12;
	mov.f32 	%f743, %f13;
	mov.f32 	%f744, %f14;
	mov.f32 	%f745, %f15;
$LDWendi__Z10GetLuma7098PixelRGB_266_48:
	.loc	22	609	0
	mov.f32 	%f672, %f742;
	mov.f32 	%f673, %f743;
	mov.f32 	%f674, %f744;
	mov.f32 	%f675, %f745;
$Lt_89_282370:
$Lt_89_281858:
$Lt_89_281346:
	.loc	6	201	0
	mov.f32 	%f2, %f672;
	mov.f32 	%f4, %f673;
	mov.f32 	%f6, %f674;
	mov.f32 	%f9, %f675;
	bra.uni 	$Lt_89_311554;
$Lt_89_3330:
	.loc	22	477	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f746, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f746;
	mov.f32 	%f747, %f175;
	mov.f32 	%f748, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f749, %f175, %f748;
	mov.f32 	%f750, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p72, %f749, %f750;
	@!%p72 bra 	$Lt_89_284162;
	mov.f32 	%f751, 0f00000000;   	// 0
	mov.f32 	%f752, 0f00000000;   	// 0
	mov.f32 	%f753, 0f00000000;   	// 0
	mov.f32 	%f747, 0f00000000;   	// 0
	bra.uni 	$Lt_89_283906;
$Lt_89_284162:
	.loc	22	373	0
	mov.f32 	%f754, 0f00000000;   	// 0
	max.ftz.f32 	%f755, %f12, %f754;
	mov.f32 	%f756, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f2, %f756;
	mov.f32 	%f758, 0f3f800000;   	// 1
	min.ftz.f32 	%f759, %f755, %f758;
	mov.f32 	%f760, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f760;
	mov.f32 	%f762, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p73, %f759, %f762;
	@!%p73 bra 	$Lt_89_284674;
	add.ftz.f32 	%f763, %f761, %f761;
	mul.ftz.f32 	%f764, %f759, %f763;
	bra.uni 	$Lt_89_284418;
$Lt_89_284674:
	mov.f32 	%f765, 0f3f800000;   	// 1
	sub.ftz.f32 	%f766, %f765, %f761;
	mov.f32 	%f767, 0f3f800000;   	// 1
	add.ftz.f32 	%f768, %f766, %f766;
	mov.f32 	%f769, 0f3f800000;   	// 1
	sub.ftz.f32 	%f770, %f769, %f759;
	mul.ftz.f32 	%f771, %f768, %f770;
	sub.ftz.f32 	%f764, %f767, %f771;
$Lt_89_284418:
	.loc	22	477	0
	mov.f32 	%f772, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f772, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f773, 0f3f800000;   	// 1
	mul.ftz.f32 	%f774, %f172, %f185;
	sub.ftz.f32 	%f189, %f773, %f774;
	mov.f32 	%f775, 0f00000000;   	// 0
	max.ftz.f32 	%f776, %f764, %f775;
	mov.f32 	%f777, 0f3f800000;   	// 1
	min.ftz.f32 	%f778, %f776, %f777;
	mul.ftz.f32 	%f779, %f15, %f778;
	fma.rn.ftz.f32 	%f780, %f2, %f184, %f779;
	mul.ftz.f32 	%f781, %f186, %f780;
	fma.rn.ftz.f32 	%f753, %f12, %f189, %f781;
	.loc	22	373	0
	mov.f32 	%f782, 0f00000000;   	// 0
	max.ftz.f32 	%f783, %f13, %f782;
	mov.f32 	%f784, 0f00000000;   	// 0
	max.ftz.f32 	%f785, %f4, %f784;
	mov.f32 	%f786, 0f3f800000;   	// 1
	min.ftz.f32 	%f787, %f783, %f786;
	mov.f32 	%f788, 0f3f800000;   	// 1
	min.ftz.f32 	%f789, %f785, %f788;
	mov.f32 	%f790, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p74, %f787, %f790;
	@!%p74 bra 	$Lt_89_285186;
	add.ftz.f32 	%f791, %f789, %f789;
	mul.ftz.f32 	%f792, %f787, %f791;
	bra.uni 	$Lt_89_284930;
$Lt_89_285186:
	mov.f32 	%f793, 0f3f800000;   	// 1
	sub.ftz.f32 	%f794, %f793, %f789;
	mov.f32 	%f795, 0f3f800000;   	// 1
	add.ftz.f32 	%f796, %f794, %f794;
	mov.f32 	%f797, 0f3f800000;   	// 1
	sub.ftz.f32 	%f798, %f797, %f787;
	mul.ftz.f32 	%f799, %f796, %f798;
	sub.ftz.f32 	%f792, %f795, %f799;
$Lt_89_284930:
	.loc	22	477	0
	mov.f32 	%f800, 0f00000000;   	// 0
	max.ftz.f32 	%f801, %f792, %f800;
	mov.f32 	%f802, 0f3f800000;   	// 1
	min.ftz.f32 	%f803, %f801, %f802;
	mul.ftz.f32 	%f804, %f15, %f803;
	fma.rn.ftz.f32 	%f805, %f4, %f184, %f804;
	mul.ftz.f32 	%f806, %f186, %f805;
	fma.rn.ftz.f32 	%f752, %f13, %f189, %f806;
	.loc	22	373	0
	mov.f32 	%f807, 0f00000000;   	// 0
	max.ftz.f32 	%f808, %f14, %f807;
	mov.f32 	%f809, 0f00000000;   	// 0
	max.ftz.f32 	%f810, %f6, %f809;
	mov.f32 	%f811, 0f3f800000;   	// 1
	min.ftz.f32 	%f812, %f808, %f811;
	mov.f32 	%f813, 0f3f800000;   	// 1
	min.ftz.f32 	%f814, %f810, %f813;
	mov.f32 	%f815, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p75, %f812, %f815;
	@!%p75 bra 	$Lt_89_285698;
	add.ftz.f32 	%f816, %f814, %f814;
	mul.ftz.f32 	%f817, %f812, %f816;
	bra.uni 	$Lt_89_285442;
$Lt_89_285698:
	mov.f32 	%f818, 0f3f800000;   	// 1
	sub.ftz.f32 	%f819, %f818, %f814;
	mov.f32 	%f820, 0f3f800000;   	// 1
	add.ftz.f32 	%f821, %f819, %f819;
	mov.f32 	%f822, 0f3f800000;   	// 1
	sub.ftz.f32 	%f823, %f822, %f812;
	mul.ftz.f32 	%f824, %f821, %f823;
	sub.ftz.f32 	%f817, %f820, %f824;
$Lt_89_285442:
	.loc	22	477	0
	mov.f32 	%f825, 0f00000000;   	// 0
	max.ftz.f32 	%f826, %f817, %f825;
	mov.f32 	%f827, 0f3f800000;   	// 1
	min.ftz.f32 	%f828, %f826, %f827;
	mul.ftz.f32 	%f829, %f15, %f828;
	fma.rn.ftz.f32 	%f830, %f6, %f184, %f829;
	mul.ftz.f32 	%f831, %f186, %f830;
	fma.rn.ftz.f32 	%f751, %f14, %f189, %f831;
$Lt_89_283906:
	.loc	6	202	0
	mov.f32 	%f2, %f753;
	mov.f32 	%f4, %f752;
	mov.f32 	%f6, %f751;
	mov.f32 	%f9, %f747;
	bra.uni 	$Lt_89_311554;
$Lt_89_3586:
	.loc	22	478	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f832, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f832;
	mov.f32 	%f833, %f175;
	mov.f32 	%f834, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f835, %f175, %f834;
	mov.f32 	%f836, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p76, %f835, %f836;
	@!%p76 bra 	$Lt_89_286210;
	mov.f32 	%f837, 0f00000000;   	// 0
	mov.f32 	%f838, 0f00000000;   	// 0
	mov.f32 	%f839, 0f00000000;   	// 0
	mov.f32 	%f833, 0f00000000;   	// 0
	bra.uni 	$Lt_89_285954;
$Lt_89_286210:
	.loc	22	380	0
	mov.f32 	%f840, 0f00000000;   	// 0
	max.ftz.f32 	%f755, %f12, %f840;
	mov.f32 	%f841, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f2, %f841;
	mov.f32 	%f842, 0f3f800000;   	// 1
	min.ftz.f32 	%f759, %f755, %f842;
	mov.f32 	%f843, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f843;
	add.ftz.f32 	%f844, %f761, %f761;
	mov.f32 	%f845, 0fbf800000;   	// -1
	add.ftz.f32 	%f846, %f844, %f845;
	mov.f32 	%f847, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p77, %f761, %f847;
	@!%p77 bra 	$Lt_89_286722;
	mul.ftz.f32 	%f848, %f759, %f759;
	sub.ftz.f32 	%f849, %f759, %f848;
	fma.rn.ftz.f32 	%f850, %f846, %f849, %f759;
	bra.uni 	$Lt_89_286466;
$Lt_89_286722:
	sqrt.approx.ftz.f32 	%f851, %f759;
	sub.ftz.f32 	%f852, %f851, %f759;
	fma.rn.ftz.f32 	%f850, %f846, %f852, %f759;
$Lt_89_286466:
	.loc	22	478	0
	mov.f32 	%f853, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f853, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f854, 0f3f800000;   	// 1
	mul.ftz.f32 	%f855, %f172, %f185;
	sub.ftz.f32 	%f189, %f854, %f855;
	mov.f32 	%f856, 0f00000000;   	// 0
	max.ftz.f32 	%f857, %f850, %f856;
	mov.f32 	%f858, 0f3f800000;   	// 1
	min.ftz.f32 	%f859, %f857, %f858;
	mul.ftz.f32 	%f860, %f15, %f859;
	fma.rn.ftz.f32 	%f861, %f2, %f184, %f860;
	mul.ftz.f32 	%f862, %f186, %f861;
	fma.rn.ftz.f32 	%f839, %f12, %f189, %f862;
	.loc	22	380	0
	mov.f32 	%f863, 0f00000000;   	// 0
	max.ftz.f32 	%f783, %f13, %f863;
	mov.f32 	%f864, 0f00000000;   	// 0
	max.ftz.f32 	%f785, %f4, %f864;
	mov.f32 	%f865, 0f3f800000;   	// 1
	min.ftz.f32 	%f787, %f783, %f865;
	mov.f32 	%f866, 0f3f800000;   	// 1
	min.ftz.f32 	%f789, %f785, %f866;
	add.ftz.f32 	%f867, %f789, %f789;
	mov.f32 	%f868, 0fbf800000;   	// -1
	add.ftz.f32 	%f869, %f867, %f868;
	mov.f32 	%f870, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p78, %f789, %f870;
	@!%p78 bra 	$Lt_89_287234;
	mul.ftz.f32 	%f871, %f787, %f787;
	sub.ftz.f32 	%f872, %f787, %f871;
	fma.rn.ftz.f32 	%f873, %f869, %f872, %f787;
	bra.uni 	$Lt_89_286978;
$Lt_89_287234:
	sqrt.approx.ftz.f32 	%f874, %f787;
	sub.ftz.f32 	%f875, %f874, %f787;
	fma.rn.ftz.f32 	%f873, %f869, %f875, %f787;
$Lt_89_286978:
	.loc	22	478	0
	mov.f32 	%f876, 0f00000000;   	// 0
	max.ftz.f32 	%f877, %f873, %f876;
	mov.f32 	%f878, 0f3f800000;   	// 1
	min.ftz.f32 	%f879, %f877, %f878;
	mul.ftz.f32 	%f880, %f15, %f879;
	fma.rn.ftz.f32 	%f881, %f4, %f184, %f880;
	mul.ftz.f32 	%f882, %f186, %f881;
	fma.rn.ftz.f32 	%f838, %f13, %f189, %f882;
	.loc	22	380	0
	mov.f32 	%f883, 0f00000000;   	// 0
	max.ftz.f32 	%f808, %f14, %f883;
	mov.f32 	%f884, 0f00000000;   	// 0
	max.ftz.f32 	%f810, %f6, %f884;
	mov.f32 	%f885, 0f3f800000;   	// 1
	min.ftz.f32 	%f812, %f808, %f885;
	mov.f32 	%f886, 0f3f800000;   	// 1
	min.ftz.f32 	%f814, %f810, %f886;
	add.ftz.f32 	%f887, %f814, %f814;
	mov.f32 	%f888, 0fbf800000;   	// -1
	add.ftz.f32 	%f889, %f887, %f888;
	mov.f32 	%f890, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p79, %f814, %f890;
	@!%p79 bra 	$Lt_89_287746;
	mul.ftz.f32 	%f891, %f812, %f812;
	sub.ftz.f32 	%f892, %f812, %f891;
	fma.rn.ftz.f32 	%f893, %f889, %f892, %f812;
	bra.uni 	$Lt_89_287490;
$Lt_89_287746:
	sqrt.approx.ftz.f32 	%f894, %f812;
	sub.ftz.f32 	%f895, %f894, %f812;
	fma.rn.ftz.f32 	%f893, %f889, %f895, %f812;
$Lt_89_287490:
	.loc	22	478	0
	mov.f32 	%f896, 0f00000000;   	// 0
	max.ftz.f32 	%f897, %f893, %f896;
	mov.f32 	%f898, 0f3f800000;   	// 1
	min.ftz.f32 	%f899, %f897, %f898;
	mul.ftz.f32 	%f900, %f15, %f899;
	fma.rn.ftz.f32 	%f901, %f6, %f184, %f900;
	mul.ftz.f32 	%f902, %f186, %f901;
	fma.rn.ftz.f32 	%f837, %f14, %f189, %f902;
$Lt_89_285954:
	.loc	6	203	0
	mov.f32 	%f2, %f839;
	mov.f32 	%f4, %f838;
	mov.f32 	%f6, %f837;
	mov.f32 	%f9, %f833;
	bra.uni 	$Lt_89_311554;
$Lt_89_3842:
	.loc	22	479	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f903, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f903;
	mov.f32 	%f904, %f175;
	mov.f32 	%f905, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f906, %f175, %f905;
	mov.f32 	%f907, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p80, %f906, %f907;
	@!%p80 bra 	$Lt_89_288258;
	mov.f32 	%f908, 0f00000000;   	// 0
	mov.f32 	%f909, 0f00000000;   	// 0
	mov.f32 	%f910, 0f00000000;   	// 0
	mov.f32 	%f904, 0f00000000;   	// 0
	bra.uni 	$Lt_89_288002;
$Lt_89_288258:
	.loc	22	386	0
	mov.f32 	%f911, 0f00000000;   	// 0
	max.ftz.f32 	%f755, %f12, %f911;
	mov.f32 	%f912, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f2, %f912;
	mov.f32 	%f913, 0f3f800000;   	// 1
	min.ftz.f32 	%f759, %f755, %f913;
	mov.f32 	%f914, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f914;
	mov.f32 	%f915, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p81, %f761, %f915;
	@!%p81 bra 	$Lt_89_288770;
	add.ftz.f32 	%f916, %f761, %f761;
	mul.ftz.f32 	%f917, %f759, %f916;
	bra.uni 	$Lt_89_288514;
$Lt_89_288770:
	mov.f32 	%f918, 0f3f800000;   	// 1
	sub.ftz.f32 	%f919, %f918, %f761;
	mov.f32 	%f920, 0f3f800000;   	// 1
	add.ftz.f32 	%f921, %f919, %f919;
	mov.f32 	%f922, 0f3f800000;   	// 1
	sub.ftz.f32 	%f923, %f922, %f759;
	mul.ftz.f32 	%f924, %f921, %f923;
	sub.ftz.f32 	%f917, %f920, %f924;
$Lt_89_288514:
	.loc	22	479	0
	mov.f32 	%f925, 0f3f800000;   	// 1
	sub.ftz.f32 	%f184, %f925, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f926, 0f3f800000;   	// 1
	mul.ftz.f32 	%f927, %f172, %f185;
	sub.ftz.f32 	%f189, %f926, %f927;
	mov.f32 	%f928, 0f00000000;   	// 0
	max.ftz.f32 	%f929, %f917, %f928;
	mov.f32 	%f930, 0f3f800000;   	// 1
	min.ftz.f32 	%f931, %f929, %f930;
	mul.ftz.f32 	%f932, %f15, %f931;
	fma.rn.ftz.f32 	%f933, %f2, %f184, %f932;
	mul.ftz.f32 	%f934, %f186, %f933;
	fma.rn.ftz.f32 	%f910, %f12, %f189, %f934;
	.loc	22	386	0
	mov.f32 	%f935, 0f00000000;   	// 0
	max.ftz.f32 	%f783, %f13, %f935;
	mov.f32 	%f936, 0f00000000;   	// 0
	max.ftz.f32 	%f785, %f4, %f936;
	mov.f32 	%f937, 0f3f800000;   	// 1
	min.ftz.f32 	%f787, %f783, %f937;
	mov.f32 	%f938, 0f3f800000;   	// 1
	min.ftz.f32 	%f789, %f785, %f938;
	mov.f32 	%f939, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p82, %f789, %f939;
	@!%p82 bra 	$Lt_89_289282;
	add.ftz.f32 	%f940, %f789, %f789;
	mul.ftz.f32 	%f941, %f787, %f940;
	bra.uni 	$Lt_89_289026;
$Lt_89_289282:
	mov.f32 	%f942, 0f3f800000;   	// 1
	sub.ftz.f32 	%f943, %f942, %f789;
	mov.f32 	%f944, 0f3f800000;   	// 1
	add.ftz.f32 	%f945, %f943, %f943;
	mov.f32 	%f946, 0f3f800000;   	// 1
	sub.ftz.f32 	%f947, %f946, %f787;
	mul.ftz.f32 	%f948, %f945, %f947;
	sub.ftz.f32 	%f941, %f944, %f948;
$Lt_89_289026:
	.loc	22	479	0
	mov.f32 	%f949, 0f00000000;   	// 0
	max.ftz.f32 	%f950, %f941, %f949;
	mov.f32 	%f951, 0f3f800000;   	// 1
	min.ftz.f32 	%f952, %f950, %f951;
	mul.ftz.f32 	%f953, %f15, %f952;
	fma.rn.ftz.f32 	%f954, %f4, %f184, %f953;
	mul.ftz.f32 	%f955, %f186, %f954;
	fma.rn.ftz.f32 	%f909, %f13, %f189, %f955;
	.loc	22	386	0
	mov.f32 	%f956, 0f00000000;   	// 0
	max.ftz.f32 	%f808, %f14, %f956;
	mov.f32 	%f957, 0f00000000;   	// 0
	max.ftz.f32 	%f810, %f6, %f957;
	mov.f32 	%f958, 0f3f800000;   	// 1
	min.ftz.f32 	%f812, %f808, %f958;
	mov.f32 	%f959, 0f3f800000;   	// 1
	min.ftz.f32 	%f814, %f810, %f959;
	mov.f32 	%f960, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p83, %f814, %f960;
	@!%p83 bra 	$Lt_89_289794;
	add.ftz.f32 	%f961, %f814, %f814;
	mul.ftz.f32 	%f962, %f812, %f961;
	bra.uni 	$Lt_89_289538;
$Lt_89_289794:
	mov.f32 	%f963, 0f3f800000;   	// 1
	sub.ftz.f32 	%f964, %f963, %f814;
	mov.f32 	%f965, 0f3f800000;   	// 1
	add.ftz.f32 	%f966, %f964, %f964;
	mov.f32 	%f967, 0f3f800000;   	// 1
	sub.ftz.f32 	%f968, %f967, %f812;
	mul.ftz.f32 	%f969, %f966, %f968;
	sub.ftz.f32 	%f962, %f965, %f969;
$Lt_89_289538:
	.loc	22	479	0
	mov.f32 	%f970, 0f00000000;   	// 0
	max.ftz.f32 	%f971, %f962, %f970;
	mov.f32 	%f972, 0f3f800000;   	// 1
	min.ftz.f32 	%f973, %f971, %f972;
	mul.ftz.f32 	%f974, %f15, %f973;
	fma.rn.ftz.f32 	%f975, %f6, %f184, %f974;
	mul.ftz.f32 	%f976, %f186, %f975;
	fma.rn.ftz.f32 	%f908, %f14, %f189, %f976;
$Lt_89_288002:
	.loc	6	204	0
	mov.f32 	%f2, %f910;
	mov.f32 	%f4, %f909;
	mov.f32 	%f6, %f908;
	mov.f32 	%f9, %f904;
	bra.uni 	$Lt_89_311554;
$Lt_89_4098:
	.loc	22	480	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f977, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f977;
	mov.f32 	%f978, %f175;
	mov.f32 	%f979, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f980, %f175, %f979;
	mov.f32 	%f981, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p84, %f980, %f981;
	@!%p84 bra 	$Lt_89_290306;
	mov.f32 	%f982, 0f00000000;   	// 0
	mov.f32 	%f983, 0f00000000;   	// 0
	mov.f32 	%f984, 0f00000000;   	// 0
	mov.f32 	%f978, 0f00000000;   	// 0
	bra.uni 	$Lt_89_290050;
$Lt_89_290306:
	.loc	22	431	0
	mov.f32 	%f985, 0f00000000;   	// 0
	max.ftz.f32 	%f755, %f12, %f985;
	mov.f32 	%f986, 0f358637bd;   	// 1e-006
	max.ftz.f32 	%f987, %f2, %f986;
	mov.f32 	%f988, 0f3f800000;   	// 1
	min.ftz.f32 	%f759, %f755, %f988;
	mov.f32 	%f989, 0f3f7fffef;   	// 0.999999
	min.ftz.f32 	%f990, %f987, %f989;
	mov.f32 	%f991, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p85, %f990, %f991;
	@!%p85 bra 	$Lt_89_239874;
	.loc	22	433	0
	mov.f32 	%f992, 0f3f800000;   	// 1
	mov.f32 	%f993, 0f3f800000;   	// 1
	sub.ftz.f32 	%f994, %f993, %f759;
	add.ftz.f32 	%f995, %f990, %f990;
	div.approx.ftz.f32 	%f996, %f994, %f995;
	sub.ftz.f32 	%f997, %f992, %f996;
	mov.f32 	%f998, 0f00000000;   	// 0
	max.ftz.f32 	%f999, %f997, %f998;
	mov.f32 	%f1000, 0f3f800000;  	// 1
	min.ftz.f32 	%f1001, %f999, %f1000;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__266_46;
$Lt_89_239874:
	.loc	22	437	0
	mov.f32 	%f1002, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1003, %f1002, %f990;
	add.ftz.f32 	%f1004, %f1003, %f1003;
	div.approx.ftz.f32 	%f1005, %f759, %f1004;
	mov.f32 	%f1006, 0f00000000;  	// 0
	max.ftz.f32 	%f1007, %f1005, %f1006;
	mov.f32 	%f1008, 0f3f800000;  	// 1
	min.ftz.f32 	%f1001, %f1007, %f1008;
$LDWendi__Z5ClampIfET_S0_S0_S0__266_46:
	.loc	22	480	0
	mov.f32 	%f1009, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1009, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1010, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1011, %f172, %f185;
	sub.ftz.f32 	%f189, %f1010, %f1011;
	mul.ftz.f32 	%f1012, %f1001, %f15;
	fma.rn.ftz.f32 	%f1013, %f2, %f184, %f1012;
	mul.ftz.f32 	%f1014, %f186, %f1013;
	fma.rn.ftz.f32 	%f984, %f12, %f189, %f1014;
	.loc	22	431	0
	mov.f32 	%f1015, 0f00000000;  	// 0
	max.ftz.f32 	%f783, %f13, %f1015;
	mov.f32 	%f1016, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1017, %f4, %f1016;
	mov.f32 	%f1018, 0f3f800000;  	// 1
	min.ftz.f32 	%f787, %f783, %f1018;
	mov.f32 	%f1019, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1020, %f1017, %f1019;
	mov.f32 	%f1021, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p86, %f1020, %f1021;
	@!%p86 bra 	$Lt_89_240130;
	.loc	22	433	0
	mov.f32 	%f1022, 0f3f800000;  	// 1
	mov.f32 	%f1023, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1024, %f1023, %f787;
	add.ftz.f32 	%f1025, %f1020, %f1020;
	div.approx.ftz.f32 	%f1026, %f1024, %f1025;
	sub.ftz.f32 	%f1027, %f1022, %f1026;
	mov.f32 	%f1028, 0f00000000;  	// 0
	max.ftz.f32 	%f1029, %f1027, %f1028;
	mov.f32 	%f1030, 0f3f800000;  	// 1
	min.ftz.f32 	%f1031, %f1029, %f1030;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__266_44;
$Lt_89_240130:
	.loc	22	437	0
	mov.f32 	%f1032, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1033, %f1032, %f1020;
	add.ftz.f32 	%f1034, %f1033, %f1033;
	div.approx.ftz.f32 	%f1035, %f787, %f1034;
	mov.f32 	%f1036, 0f00000000;  	// 0
	max.ftz.f32 	%f1037, %f1035, %f1036;
	mov.f32 	%f1038, 0f3f800000;  	// 1
	min.ftz.f32 	%f1031, %f1037, %f1038;
$LDWendi__Z5ClampIfET_S0_S0_S0__266_44:
	.loc	22	480	0
	mul.ftz.f32 	%f1039, %f1031, %f15;
	fma.rn.ftz.f32 	%f1040, %f4, %f184, %f1039;
	mul.ftz.f32 	%f1041, %f186, %f1040;
	fma.rn.ftz.f32 	%f983, %f13, %f189, %f1041;
	.loc	22	431	0
	mov.f32 	%f1042, 0f00000000;  	// 0
	max.ftz.f32 	%f808, %f14, %f1042;
	mov.f32 	%f1043, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1044, %f6, %f1043;
	mov.f32 	%f1045, 0f3f800000;  	// 1
	min.ftz.f32 	%f812, %f808, %f1045;
	mov.f32 	%f1046, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1047, %f1044, %f1046;
	mov.f32 	%f1048, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p87, %f1047, %f1048;
	@!%p87 bra 	$Lt_89_240386;
	.loc	22	433	0
	mov.f32 	%f1049, 0f3f800000;  	// 1
	mov.f32 	%f1050, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1051, %f1050, %f812;
	add.ftz.f32 	%f1052, %f1047, %f1047;
	div.approx.ftz.f32 	%f1053, %f1051, %f1052;
	sub.ftz.f32 	%f1054, %f1049, %f1053;
	mov.f32 	%f1055, 0f00000000;  	// 0
	max.ftz.f32 	%f1056, %f1054, %f1055;
	mov.f32 	%f1057, 0f3f800000;  	// 1
	min.ftz.f32 	%f1058, %f1056, %f1057;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__266_42;
$Lt_89_240386:
	.loc	22	437	0
	mov.f32 	%f1059, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1060, %f1059, %f1047;
	add.ftz.f32 	%f1061, %f1060, %f1060;
	div.approx.ftz.f32 	%f1062, %f812, %f1061;
	mov.f32 	%f1063, 0f00000000;  	// 0
	max.ftz.f32 	%f1064, %f1062, %f1063;
	mov.f32 	%f1065, 0f3f800000;  	// 1
	min.ftz.f32 	%f1058, %f1064, %f1065;
$LDWendi__Z5ClampIfET_S0_S0_S0__266_42:
	.loc	22	480	0
	mul.ftz.f32 	%f1066, %f1058, %f15;
	fma.rn.ftz.f32 	%f1067, %f6, %f184, %f1066;
	mul.ftz.f32 	%f1068, %f186, %f1067;
	fma.rn.ftz.f32 	%f982, %f14, %f189, %f1068;
$Lt_89_290050:
	.loc	6	205	0
	mov.f32 	%f2, %f984;
	mov.f32 	%f4, %f983;
	mov.f32 	%f6, %f982;
	mov.f32 	%f9, %f978;
	bra.uni 	$Lt_89_311554;
$Lt_89_4354:
	.loc	22	481	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1069, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1069;
	mov.f32 	%f1070, %f175;
	mov.f32 	%f1071, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1072, %f175, %f1071;
	mov.f32 	%f1073, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p88, %f1072, %f1073;
	@!%p88 bra 	$Lt_89_290818;
	mov.f32 	%f1074, 0f00000000;  	// 0
	mov.f32 	%f1075, 0f00000000;  	// 0
	mov.f32 	%f1076, 0f00000000;  	// 0
	mov.f32 	%f1070, 0f00000000;  	// 0
	bra.uni 	$Lt_89_290562;
$Lt_89_290818:
	mov.f32 	%f1077, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1077, %f15;
	mov.f32 	%f1078, 0f00000000;  	// 0
	max.ftz.f32 	%f757, %f2, %f1078;
	mov.f32 	%f1079, 0f3f800000;  	// 1
	min.ftz.f32 	%f761, %f757, %f1079;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1080, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1081, %f172, %f185;
	sub.ftz.f32 	%f189, %f1080, %f1081;
	add.ftz.f32 	%f1082, %f761, %f761;
	mov.f32 	%f1083, 0f00000000;  	// 0
	max.ftz.f32 	%f1084, %f12, %f1083;
	mov.f32 	%f1085, 0f3f800000;  	// 1
	min.ftz.f32 	%f1086, %f1084, %f1085;
	add.ftz.f32 	%f1087, %f1082, %f1086;
	mov.f32 	%f1088, 0fbf800000;  	// -1
	add.ftz.f32 	%f1089, %f1087, %f1088;
	mul.ftz.f32 	%f1090, %f15, %f1089;
	fma.rn.ftz.f32 	%f1091, %f2, %f184, %f1090;
	mul.ftz.f32 	%f1092, %f186, %f1091;
	fma.rn.ftz.f32 	%f1076, %f12, %f189, %f1092;
	mov.f32 	%f1093, 0f00000000;  	// 0
	max.ftz.f32 	%f785, %f4, %f1093;
	mov.f32 	%f1094, 0f3f800000;  	// 1
	min.ftz.f32 	%f789, %f785, %f1094;
	add.ftz.f32 	%f1095, %f789, %f789;
	mov.f32 	%f1096, 0f00000000;  	// 0
	max.ftz.f32 	%f1097, %f13, %f1096;
	mov.f32 	%f1098, 0f3f800000;  	// 1
	min.ftz.f32 	%f1099, %f1097, %f1098;
	add.ftz.f32 	%f1100, %f1095, %f1099;
	mov.f32 	%f1101, 0fbf800000;  	// -1
	add.ftz.f32 	%f1102, %f1100, %f1101;
	mul.ftz.f32 	%f1103, %f15, %f1102;
	fma.rn.ftz.f32 	%f1104, %f4, %f184, %f1103;
	mul.ftz.f32 	%f1105, %f186, %f1104;
	fma.rn.ftz.f32 	%f1075, %f13, %f189, %f1105;
	mov.f32 	%f1106, 0f00000000;  	// 0
	max.ftz.f32 	%f810, %f6, %f1106;
	mov.f32 	%f1107, 0f3f800000;  	// 1
	min.ftz.f32 	%f814, %f810, %f1107;
	add.ftz.f32 	%f1108, %f814, %f814;
	mov.f32 	%f1109, 0f00000000;  	// 0
	max.ftz.f32 	%f1110, %f14, %f1109;
	mov.f32 	%f1111, 0f3f800000;  	// 1
	min.ftz.f32 	%f1112, %f1110, %f1111;
	add.ftz.f32 	%f1113, %f1108, %f1112;
	mov.f32 	%f1114, 0fbf800000;  	// -1
	add.ftz.f32 	%f1115, %f1113, %f1114;
	mul.ftz.f32 	%f1116, %f15, %f1115;
	fma.rn.ftz.f32 	%f1117, %f6, %f184, %f1116;
	mul.ftz.f32 	%f1118, %f186, %f1117;
	fma.rn.ftz.f32 	%f1074, %f14, %f189, %f1118;
$Lt_89_290562:
	.loc	6	206	0
	mov.f32 	%f2, %f1076;
	mov.f32 	%f4, %f1075;
	mov.f32 	%f6, %f1074;
	mov.f32 	%f9, %f1070;
	bra.uni 	$Lt_89_311554;
$Lt_89_4610:
	.loc	22	482	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1119, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1119;
	mov.f32 	%f1120, %f175;
	mov.f32 	%f1121, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1122, %f175, %f1121;
	mov.f32 	%f1123, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p89, %f1122, %f1123;
	@!%p89 bra 	$Lt_89_291330;
	mov.f32 	%f1124, 0f00000000;  	// 0
	mov.f32 	%f1125, 0f00000000;  	// 0
	mov.f32 	%f1126, 0f00000000;  	// 0
	mov.f32 	%f1120, 0f00000000;  	// 0
	bra.uni 	$Lt_89_291074;
$Lt_89_291330:
	.loc	22	450	0
	mov.f32 	%f1127, 0f00000000;  	// 0
	max.ftz.f32 	%f755, %f12, %f1127;
	mov.f32 	%f1128, 0f00000000;  	// 0
	max.ftz.f32 	%f757, %f2, %f1128;
	mov.f32 	%f1129, 0f3f800000;  	// 1
	min.ftz.f32 	%f759, %f755, %f1129;
	mov.f32 	%f1130, 0f3f800000;  	// 1
	min.ftz.f32 	%f761, %f757, %f1130;
	add.ftz.f32 	%f1131, %f761, %f761;
	mov.f32 	%f1132, 0fbf800000;  	// -1
	add.ftz.f32 	%f1133, %f1131, %f1132;
	setp.gt.ftz.f32 	%p90, %f1133, %f759;
	@!%p90 bra 	$Lt_89_241154;
	.loc	22	452	0
	mov.f32 	%f1134, %f1133;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__266_40;
$Lt_89_241154:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p91, %f1131, %f759;
	@!%p91 bra 	$Lt_89_241410;
	.loc	22	456	0
	mov.f32 	%f1134, %f1131;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__266_40;
$Lt_89_241410:
	.loc	22	460	0
	mov.f32 	%f1134, %f759;
$LDWendi__Z5ClampIfET_S0_S0_S0__266_40:
	.loc	22	482	0
	mov.f32 	%f1135, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1135, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1136, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1137, %f172, %f185;
	sub.ftz.f32 	%f189, %f1136, %f1137;
	mul.ftz.f32 	%f1138, %f1134, %f15;
	fma.rn.ftz.f32 	%f1139, %f2, %f184, %f1138;
	mul.ftz.f32 	%f1140, %f186, %f1139;
	fma.rn.ftz.f32 	%f1126, %f12, %f189, %f1140;
	.loc	22	450	0
	mov.f32 	%f1141, 0f00000000;  	// 0
	max.ftz.f32 	%f783, %f13, %f1141;
	mov.f32 	%f1142, 0f00000000;  	// 0
	max.ftz.f32 	%f785, %f4, %f1142;
	mov.f32 	%f1143, 0f3f800000;  	// 1
	min.ftz.f32 	%f787, %f783, %f1143;
	mov.f32 	%f1144, 0f3f800000;  	// 1
	min.ftz.f32 	%f789, %f785, %f1144;
	add.ftz.f32 	%f1145, %f789, %f789;
	mov.f32 	%f1146, 0fbf800000;  	// -1
	add.ftz.f32 	%f1147, %f1145, %f1146;
	setp.gt.ftz.f32 	%p92, %f1147, %f787;
	@!%p92 bra 	$Lt_89_241666;
	.loc	22	452	0
	mov.f32 	%f1148, %f1147;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__266_38;
$Lt_89_241666:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p93, %f1145, %f787;
	@!%p93 bra 	$Lt_89_241922;
	.loc	22	456	0
	mov.f32 	%f1148, %f1145;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__266_38;
$Lt_89_241922:
	.loc	22	460	0
	mov.f32 	%f1148, %f787;
$LDWendi__Z5ClampIfET_S0_S0_S0__266_38:
	.loc	22	482	0
	mul.ftz.f32 	%f1149, %f1148, %f15;
	fma.rn.ftz.f32 	%f1150, %f4, %f184, %f1149;
	mul.ftz.f32 	%f1151, %f186, %f1150;
	fma.rn.ftz.f32 	%f1125, %f13, %f189, %f1151;
	.loc	22	450	0
	mov.f32 	%f1152, 0f00000000;  	// 0
	max.ftz.f32 	%f808, %f14, %f1152;
	mov.f32 	%f1153, 0f00000000;  	// 0
	max.ftz.f32 	%f810, %f6, %f1153;
	mov.f32 	%f1154, 0f3f800000;  	// 1
	min.ftz.f32 	%f812, %f808, %f1154;
	mov.f32 	%f1155, 0f3f800000;  	// 1
	min.ftz.f32 	%f814, %f810, %f1155;
	add.ftz.f32 	%f1156, %f814, %f814;
	mov.f32 	%f1157, 0fbf800000;  	// -1
	add.ftz.f32 	%f1158, %f1156, %f1157;
	setp.gt.ftz.f32 	%p94, %f1158, %f812;
	@!%p94 bra 	$Lt_89_242178;
	.loc	22	452	0
	mov.f32 	%f1159, %f1158;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__266_36;
$Lt_89_242178:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p95, %f1156, %f812;
	@!%p95 bra 	$Lt_89_242434;
	.loc	22	456	0
	mov.f32 	%f1159, %f1156;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__266_36;
$Lt_89_242434:
	.loc	22	460	0
	mov.f32 	%f1159, %f812;
$LDWendi__Z5ClampIfET_S0_S0_S0__266_36:
	.loc	22	482	0
	mul.ftz.f32 	%f1160, %f1159, %f15;
	fma.rn.ftz.f32 	%f1161, %f6, %f184, %f1160;
	mul.ftz.f32 	%f1162, %f186, %f1161;
	fma.rn.ftz.f32 	%f1124, %f14, %f189, %f1162;
$Lt_89_291074:
	.loc	6	207	0
	mov.f32 	%f2, %f1126;
	mov.f32 	%f4, %f1125;
	mov.f32 	%f6, %f1124;
	mov.f32 	%f9, %f1120;
	bra.uni 	$Lt_89_311554;
$Lt_89_4866:
	.loc	22	483	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1163, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1163;
	mov.f32 	%f1164, %f175;
	mov.f32 	%f1165, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1166, %f175, %f1165;
	mov.f32 	%f1167, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p96, %f1166, %f1167;
	@!%p96 bra 	$Lt_89_291842;
	mov.f32 	%f1168, 0f00000000;  	// 0
	mov.f32 	%f1169, 0f00000000;  	// 0
	mov.f32 	%f1170, 0f00000000;  	// 0
	mov.f32 	%f1164, 0f00000000;  	// 0
	bra.uni 	$Lt_89_291586;
$Lt_89_291842:
	mov.f32 	%f1171, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1171, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1172, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1173, %f172, %f185;
	sub.ftz.f32 	%f189, %f1172, %f1173;
	mov.f32 	%f1174, 0f00000000;  	// 0
	mov.f32 	%f1175, 0f3f800000;  	// 1
	mov.f32 	%f1176, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1177, %f1176, %f12;
	setp.lt.ftz.f32 	%p97, %f2, %f1177;
	selp.f32 	%f1178, %f1174, %f1175, %p97;
	mul.ftz.f32 	%f1179, %f1178, %f15;
	fma.rn.ftz.f32 	%f1180, %f2, %f184, %f1179;
	mul.ftz.f32 	%f1181, %f186, %f1180;
	fma.rn.ftz.f32 	%f1170, %f12, %f189, %f1181;
	mov.f32 	%f1182, 0f00000000;  	// 0
	mov.f32 	%f1183, 0f3f800000;  	// 1
	mov.f32 	%f1184, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1185, %f1184, %f13;
	setp.lt.ftz.f32 	%p98, %f4, %f1185;
	selp.f32 	%f1186, %f1182, %f1183, %p98;
	mul.ftz.f32 	%f1187, %f1186, %f15;
	fma.rn.ftz.f32 	%f1188, %f4, %f184, %f1187;
	mul.ftz.f32 	%f1189, %f186, %f1188;
	fma.rn.ftz.f32 	%f1169, %f13, %f189, %f1189;
	mov.f32 	%f1190, 0f00000000;  	// 0
	mov.f32 	%f1191, 0f3f800000;  	// 1
	mov.f32 	%f1192, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1193, %f1192, %f14;
	setp.lt.ftz.f32 	%p99, %f6, %f1193;
	selp.f32 	%f1194, %f1190, %f1191, %p99;
	mul.ftz.f32 	%f1195, %f1194, %f15;
	fma.rn.ftz.f32 	%f1196, %f6, %f184, %f1195;
	mul.ftz.f32 	%f1197, %f186, %f1196;
	fma.rn.ftz.f32 	%f1168, %f14, %f189, %f1197;
$Lt_89_291586:
	.loc	6	208	0
	mov.f32 	%f2, %f1170;
	mov.f32 	%f4, %f1169;
	mov.f32 	%f6, %f1168;
	mov.f32 	%f9, %f1164;
	bra.uni 	$Lt_89_311554;
$Lt_89_5122:
	.loc	22	484	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1198, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1198;
	mov.f32 	%f1199, %f175;
	mov.f32 	%f1200, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1201, %f175, %f1200;
	mov.f32 	%f1202, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p100, %f1201, %f1202;
	@!%p100 bra 	$Lt_89_292354;
	mov.f32 	%f1203, 0f00000000;  	// 0
	mov.f32 	%f1204, 0f00000000;  	// 0
	mov.f32 	%f1205, 0f00000000;  	// 0
	mov.f32 	%f1199, 0f00000000;  	// 0
	bra.uni 	$Lt_89_292098;
$Lt_89_292354:
	mov.f32 	%f1206, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1206, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1207, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1208, %f172, %f185;
	sub.ftz.f32 	%f189, %f1207, %f1208;
	sub.ftz.f32 	%f1209, %f2, %f12;
	abs.ftz.f32 	%f1210, %f1209;
	mul.ftz.f32 	%f1211, %f15, %f1210;
	fma.rn.ftz.f32 	%f1212, %f2, %f184, %f1211;
	mul.ftz.f32 	%f1213, %f186, %f1212;
	fma.rn.ftz.f32 	%f1205, %f12, %f189, %f1213;
	sub.ftz.f32 	%f1214, %f4, %f13;
	abs.ftz.f32 	%f1215, %f1214;
	mul.ftz.f32 	%f1216, %f15, %f1215;
	fma.rn.ftz.f32 	%f1217, %f4, %f184, %f1216;
	mul.ftz.f32 	%f1218, %f186, %f1217;
	fma.rn.ftz.f32 	%f1204, %f13, %f189, %f1218;
	sub.ftz.f32 	%f1219, %f6, %f14;
	abs.ftz.f32 	%f1220, %f1219;
	mul.ftz.f32 	%f1221, %f15, %f1220;
	fma.rn.ftz.f32 	%f1222, %f6, %f184, %f1221;
	mul.ftz.f32 	%f1223, %f186, %f1222;
	fma.rn.ftz.f32 	%f1203, %f14, %f189, %f1223;
$Lt_89_292098:
	.loc	6	209	0
	mov.f32 	%f2, %f1205;
	mov.f32 	%f4, %f1204;
	mov.f32 	%f6, %f1203;
	mov.f32 	%f9, %f1199;
	bra.uni 	$Lt_89_311554;
$Lt_89_5378:
	.loc	22	485	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1224, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1224;
	mov.f32 	%f1225, %f175;
	mov.f32 	%f1226, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1227, %f175, %f1226;
	mov.f32 	%f1228, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p101, %f1227, %f1228;
	@!%p101 bra 	$Lt_89_292866;
	mov.f32 	%f1229, 0f00000000;  	// 0
	mov.f32 	%f1230, 0f00000000;  	// 0
	mov.f32 	%f1231, 0f00000000;  	// 0
	mov.f32 	%f1225, 0f00000000;  	// 0
	bra.uni 	$Lt_89_292610;
$Lt_89_292866:
	mov.f32 	%f1232, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1232, %f15;
	mov.f32 	%f1233, 0f00000000;  	// 0
	max.ftz.f32 	%f755, %f12, %f1233;
	mov.f32 	%f1234, 0f00000000;  	// 0
	max.ftz.f32 	%f757, %f2, %f1234;
	mov.f32 	%f1235, 0f3f800000;  	// 1
	min.ftz.f32 	%f759, %f755, %f1235;
	mov.f32 	%f1236, 0f3f800000;  	// 1
	min.ftz.f32 	%f761, %f757, %f1236;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1237, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1238, %f172, %f185;
	sub.ftz.f32 	%f189, %f1237, %f1238;
	add.ftz.f32 	%f1239, %f761, %f759;
	add.ftz.f32 	%f1240, %f761, %f761;
	mul.ftz.f32 	%f1241, %f759, %f1240;
	sub.ftz.f32 	%f1242, %f1239, %f1241;
	mov.f32 	%f1243, 0f00000000;  	// 0
	max.ftz.f32 	%f1244, %f1242, %f1243;
	mov.f32 	%f1245, 0f3f800000;  	// 1
	min.ftz.f32 	%f1246, %f1244, %f1245;
	mul.ftz.f32 	%f1247, %f15, %f1246;
	fma.rn.ftz.f32 	%f1248, %f2, %f184, %f1247;
	mul.ftz.f32 	%f1249, %f186, %f1248;
	fma.rn.ftz.f32 	%f1231, %f12, %f189, %f1249;
	mov.f32 	%f1250, 0f00000000;  	// 0
	max.ftz.f32 	%f783, %f13, %f1250;
	mov.f32 	%f1251, 0f00000000;  	// 0
	max.ftz.f32 	%f785, %f4, %f1251;
	mov.f32 	%f1252, 0f3f800000;  	// 1
	min.ftz.f32 	%f787, %f783, %f1252;
	mov.f32 	%f1253, 0f3f800000;  	// 1
	min.ftz.f32 	%f789, %f785, %f1253;
	add.ftz.f32 	%f1254, %f789, %f787;
	add.ftz.f32 	%f1255, %f789, %f789;
	mul.ftz.f32 	%f1256, %f787, %f1255;
	sub.ftz.f32 	%f1257, %f1254, %f1256;
	mov.f32 	%f1258, 0f00000000;  	// 0
	max.ftz.f32 	%f1259, %f1257, %f1258;
	mov.f32 	%f1260, 0f3f800000;  	// 1
	min.ftz.f32 	%f1261, %f1259, %f1260;
	mul.ftz.f32 	%f1262, %f15, %f1261;
	fma.rn.ftz.f32 	%f1263, %f4, %f184, %f1262;
	mul.ftz.f32 	%f1264, %f186, %f1263;
	fma.rn.ftz.f32 	%f1230, %f13, %f189, %f1264;
	mov.f32 	%f1265, 0f00000000;  	// 0
	max.ftz.f32 	%f808, %f14, %f1265;
	mov.f32 	%f1266, 0f00000000;  	// 0
	max.ftz.f32 	%f810, %f6, %f1266;
	mov.f32 	%f1267, 0f3f800000;  	// 1
	min.ftz.f32 	%f812, %f808, %f1267;
	mov.f32 	%f1268, 0f3f800000;  	// 1
	min.ftz.f32 	%f814, %f810, %f1268;
	add.ftz.f32 	%f1269, %f814, %f812;
	add.ftz.f32 	%f1270, %f814, %f814;
	mul.ftz.f32 	%f1271, %f812, %f1270;
	sub.ftz.f32 	%f1272, %f1269, %f1271;
	mov.f32 	%f1273, 0f00000000;  	// 0
	max.ftz.f32 	%f1274, %f1272, %f1273;
	mov.f32 	%f1275, 0f3f800000;  	// 1
	min.ftz.f32 	%f1276, %f1274, %f1275;
	mul.ftz.f32 	%f1277, %f15, %f1276;
	fma.rn.ftz.f32 	%f1278, %f6, %f184, %f1277;
	mul.ftz.f32 	%f1279, %f186, %f1278;
	fma.rn.ftz.f32 	%f1229, %f14, %f189, %f1279;
$Lt_89_292610:
	.loc	6	210	0
	mov.f32 	%f2, %f1231;
	mov.f32 	%f4, %f1230;
	mov.f32 	%f6, %f1229;
	mov.f32 	%f9, %f1225;
	bra.uni 	$Lt_89_311554;
$Lt_89_5634:
	.loc	22	486	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1280, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1280;
	mov.f32 	%f1281, %f175;
	mov.f32 	%f1282, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1283, %f175, %f1282;
	mov.f32 	%f1284, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p102, %f1283, %f1284;
	@!%p102 bra 	$Lt_89_293378;
	mov.f32 	%f1285, 0f00000000;  	// 0
	mov.f32 	%f1286, 0f00000000;  	// 0
	mov.f32 	%f1287, 0f00000000;  	// 0
	mov.f32 	%f1281, 0f00000000;  	// 0
	bra.uni 	$Lt_89_293122;
$Lt_89_293378:
	mov.f32 	%f1288, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1288, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1289, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1290, %f172, %f185;
	sub.ftz.f32 	%f189, %f1289, %f1290;
	mov.f32 	%f1291, 0f00000000;  	// 0
	max.ftz.f32 	%f1292, %f12, %f1291;
	mov.f32 	%f1293, 0f3f800000;  	// 1
	min.ftz.f32 	%f1294, %f1292, %f1293;
	mov.f32 	%f1295, 0f00000000;  	// 0
	max.ftz.f32 	%f1296, %f2, %f1295;
	mov.f32 	%f1297, 0f3f800000;  	// 1
	min.ftz.f32 	%f1298, %f1296, %f1297;
	sub.ftz.f32 	%f1299, %f1294, %f1298;
	mov.f32 	%f1300, 0f00000000;  	// 0
	max.ftz.f32 	%f1301, %f1299, %f1300;
	mov.f32 	%f1302, 0f3f800000;  	// 1
	min.ftz.f32 	%f1303, %f1301, %f1302;
	mul.ftz.f32 	%f1304, %f15, %f1303;
	fma.rn.ftz.f32 	%f1305, %f2, %f184, %f1304;
	mul.ftz.f32 	%f1306, %f186, %f1305;
	fma.rn.ftz.f32 	%f1287, %f12, %f189, %f1306;
	mov.f32 	%f1307, 0f00000000;  	// 0
	max.ftz.f32 	%f1308, %f13, %f1307;
	mov.f32 	%f1309, 0f3f800000;  	// 1
	min.ftz.f32 	%f1310, %f1308, %f1309;
	mov.f32 	%f1311, 0f00000000;  	// 0
	max.ftz.f32 	%f1312, %f4, %f1311;
	mov.f32 	%f1313, 0f3f800000;  	// 1
	min.ftz.f32 	%f1314, %f1312, %f1313;
	sub.ftz.f32 	%f1315, %f1310, %f1314;
	mov.f32 	%f1316, 0f00000000;  	// 0
	max.ftz.f32 	%f1317, %f1315, %f1316;
	mov.f32 	%f1318, 0f3f800000;  	// 1
	min.ftz.f32 	%f1319, %f1317, %f1318;
	mul.ftz.f32 	%f1320, %f15, %f1319;
	fma.rn.ftz.f32 	%f1321, %f4, %f184, %f1320;
	mul.ftz.f32 	%f1322, %f186, %f1321;
	fma.rn.ftz.f32 	%f1286, %f13, %f189, %f1322;
	mov.f32 	%f1323, 0f00000000;  	// 0
	max.ftz.f32 	%f1324, %f14, %f1323;
	mov.f32 	%f1325, 0f3f800000;  	// 1
	min.ftz.f32 	%f1326, %f1324, %f1325;
	mov.f32 	%f1327, 0f00000000;  	// 0
	max.ftz.f32 	%f1328, %f6, %f1327;
	mov.f32 	%f1329, 0f3f800000;  	// 1
	min.ftz.f32 	%f1330, %f1328, %f1329;
	sub.ftz.f32 	%f1331, %f1326, %f1330;
	mov.f32 	%f1332, 0f00000000;  	// 0
	max.ftz.f32 	%f1333, %f1331, %f1332;
	mov.f32 	%f1334, 0f3f800000;  	// 1
	min.ftz.f32 	%f1335, %f1333, %f1334;
	mul.ftz.f32 	%f1336, %f15, %f1335;
	fma.rn.ftz.f32 	%f1337, %f6, %f184, %f1336;
	mul.ftz.f32 	%f1338, %f186, %f1337;
	fma.rn.ftz.f32 	%f1285, %f14, %f189, %f1338;
$Lt_89_293122:
	.loc	6	211	0
	mov.f32 	%f2, %f1287;
	mov.f32 	%f4, %f1286;
	mov.f32 	%f6, %f1285;
	mov.f32 	%f9, %f1281;
	bra.uni 	$Lt_89_311554;
$Lt_89_5890:
	.loc	22	487	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1339, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1339;
	mov.f32 	%f1340, %f175;
	mov.f32 	%f1341, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1342, %f175, %f1341;
	mov.f32 	%f1343, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p103, %f1342, %f1343;
	@!%p103 bra 	$Lt_89_293890;
	mov.f32 	%f1344, 0f00000000;  	// 0
	mov.f32 	%f1345, 0f00000000;  	// 0
	mov.f32 	%f1346, 0f00000000;  	// 0
	mov.f32 	%f1340, 0f00000000;  	// 0
	bra.uni 	$Lt_89_293634;
$Lt_89_293890:
	mov.f32 	%f1347, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1347, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1348, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1349, %f172, %f185;
	sub.ftz.f32 	%f189, %f1348, %f1349;
	mov.f32 	%f1350, 0f00000000;  	// 0
	max.ftz.f32 	%f1351, %f12, %f1350;
	mov.f32 	%f1352, 0f3f800000;  	// 1
	min.ftz.f32 	%f1353, %f1351, %f1352;
	mov.f32 	%f1354, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1355, %f2, %f1354;
	mov.f32 	%f1356, 0f3f800000;  	// 1
	min.ftz.f32 	%f1357, %f1355, %f1356;
	div.approx.ftz.f32 	%f1358, %f1353, %f1357;
	mov.f32 	%f1359, 0f00000000;  	// 0
	max.ftz.f32 	%f1360, %f1358, %f1359;
	mov.f32 	%f1361, 0f3f800000;  	// 1
	min.ftz.f32 	%f1362, %f1360, %f1361;
	mul.ftz.f32 	%f1363, %f15, %f1362;
	fma.rn.ftz.f32 	%f1364, %f2, %f184, %f1363;
	mul.ftz.f32 	%f1365, %f186, %f1364;
	fma.rn.ftz.f32 	%f1346, %f12, %f189, %f1365;
	mov.f32 	%f1366, 0f00000000;  	// 0
	max.ftz.f32 	%f1367, %f13, %f1366;
	mov.f32 	%f1368, 0f3f800000;  	// 1
	min.ftz.f32 	%f1369, %f1367, %f1368;
	mov.f32 	%f1370, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1371, %f4, %f1370;
	mov.f32 	%f1372, 0f3f800000;  	// 1
	min.ftz.f32 	%f1373, %f1371, %f1372;
	div.approx.ftz.f32 	%f1374, %f1369, %f1373;
	mov.f32 	%f1375, 0f00000000;  	// 0
	max.ftz.f32 	%f1376, %f1374, %f1375;
	mov.f32 	%f1377, 0f3f800000;  	// 1
	min.ftz.f32 	%f1378, %f1376, %f1377;
	mul.ftz.f32 	%f1379, %f15, %f1378;
	fma.rn.ftz.f32 	%f1380, %f4, %f184, %f1379;
	mul.ftz.f32 	%f1381, %f186, %f1380;
	fma.rn.ftz.f32 	%f1345, %f13, %f189, %f1381;
	mov.f32 	%f1382, 0f00000000;  	// 0
	max.ftz.f32 	%f1383, %f14, %f1382;
	mov.f32 	%f1384, 0f3f800000;  	// 1
	min.ftz.f32 	%f1385, %f1383, %f1384;
	mov.f32 	%f1386, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1387, %f6, %f1386;
	mov.f32 	%f1388, 0f3f800000;  	// 1
	min.ftz.f32 	%f1389, %f1387, %f1388;
	div.approx.ftz.f32 	%f1390, %f1385, %f1389;
	mov.f32 	%f1391, 0f00000000;  	// 0
	max.ftz.f32 	%f1392, %f1390, %f1391;
	mov.f32 	%f1393, 0f3f800000;  	// 1
	min.ftz.f32 	%f1394, %f1392, %f1393;
	mul.ftz.f32 	%f1395, %f15, %f1394;
	fma.rn.ftz.f32 	%f1396, %f6, %f184, %f1395;
	mul.ftz.f32 	%f1397, %f186, %f1396;
	fma.rn.ftz.f32 	%f1344, %f14, %f189, %f1397;
$Lt_89_293634:
	.loc	6	212	0
	mov.f32 	%f2, %f1346;
	mov.f32 	%f4, %f1345;
	mov.f32 	%f6, %f1344;
	mov.f32 	%f9, %f1340;
	bra.uni 	$Lt_89_311554;
$Lt_89_6146:
	.loc	22	154	0
	setp.gt.ftz.f32 	%p104, %f4, %f2;
	max.ftz.f32 	%f1398, %f4, %f2;
	selp.f32 	%f1399, %f2, %f4, %p104;
	max.ftz.f32 	%f1400, %f1398, %f6;
	setp.lt.ftz.f32 	%p105, %f1399, %f6;
	selp.f32 	%f1401, %f1399, %f6, %p105;
	setp.eq.ftz.f32 	%p106, %f1401, %f6;
	@!%p106 bra 	$Lt_89_294402;
	setp.eq.ftz.f32 	%p107, %f1400, %f4;
	@!%p107 bra 	$Lt_89_294914;
	setp.lt.ftz.f32 	%p108, %f6, %f4;
	@!%p108 bra 	$Lt_89_295426;
	.loc	22	161	0
	max.ftz.f32 	%f1402, %f12, %f13;
	setp.lt.ftz.f32 	%p109, %f12, %f13;
	max.ftz.f32 	%f1403, %f1402, %f14;
	selp.f32 	%f1404, %f12, %f13, %p109;
	setp.lt.ftz.f32 	%p110, %f1404, %f14;
	selp.f32 	%f1405, %f1404, %f14, %p110;
	sub.ftz.f32 	%f1406, %f1403, %f1405;
	cvt.ftz.sat.f32.f32 	%f1407, %f1406;
	sub.ftz.f32 	%f1408, %f2, %f6;
	mul.ftz.f32 	%f1409, %f1407, %f1408;
	sub.ftz.f32 	%f1410, %f4, %f6;
	div.approx.ftz.f32 	%f1411, %f1409, %f1410;
	.loc	22	162	0
	mov.f32 	%f1412, %f1407;
	bra.uni 	$Lt_89_295682;
$Lt_89_295426:
	.loc	22	166	0
	mov.f32 	%f1411, 0f00000000;  	// 0
	mov.f32 	%f1412, 0f00000000;  	// 0
	bra.uni 	$Lt_89_295682;
$Lt_89_294914:
	setp.lt.ftz.f32 	%p111, %f6, %f2;
	@!%p111 bra 	$Lt_89_295938;
	.loc	22	173	0
	max.ftz.f32 	%f1402, %f12, %f13;
	setp.lt.ftz.f32 	%p109, %f12, %f13;
	max.ftz.f32 	%f1403, %f1402, %f14;
	selp.f32 	%f1404, %f12, %f13, %p109;
	setp.lt.ftz.f32 	%p110, %f1404, %f14;
	selp.f32 	%f1405, %f1404, %f14, %p110;
	sub.ftz.f32 	%f1406, %f1403, %f1405;
	cvt.ftz.sat.f32.f32 	%f1407, %f1406;
	sub.ftz.f32 	%f1413, %f4, %f6;
	mul.ftz.f32 	%f1414, %f1407, %f1413;
	sub.ftz.f32 	%f1415, %f2, %f6;
	div.approx.ftz.f32 	%f1412, %f1414, %f1415;
	.loc	22	174	0
	mov.f32 	%f1411, %f1407;
	bra.uni 	$Lt_89_295682;
$Lt_89_295938:
	.loc	22	178	0
	mov.f32 	%f1411, 0f00000000;  	// 0
	mov.f32 	%f1412, 0f00000000;  	// 0
$Lt_89_295682:
$Lt_89_294658:
	mov.f32 	%f1416, 0f00000000;  	// 0
	bra.uni 	$Lt_89_298242;
$Lt_89_294402:
	setp.eq.ftz.f32 	%p112, %f1401, %f4;
	setp.eq.ftz.f32 	%p113, %f1400, %f6;
	@!%p113 bra 	$Lt_89_296450;
	@!%p112 bra 	$Lt_89_296962;
	setp.gt.ftz.f32 	%p114, %f6, %f4;
	@!%p114 bra 	$Lt_89_297474;
	.loc	22	191	0
	max.ftz.f32 	%f1402, %f12, %f13;
	setp.lt.ftz.f32 	%p109, %f12, %f13;
	max.ftz.f32 	%f1403, %f1402, %f14;
	selp.f32 	%f1404, %f12, %f13, %p109;
	setp.lt.ftz.f32 	%p110, %f1404, %f14;
	selp.f32 	%f1405, %f1404, %f14, %p110;
	sub.ftz.f32 	%f1406, %f1403, %f1405;
	cvt.ftz.sat.f32.f32 	%f1407, %f1406;
	sub.ftz.f32 	%f1417, %f2, %f4;
	mul.ftz.f32 	%f1418, %f1407, %f1417;
	sub.ftz.f32 	%f1419, %f6, %f4;
	div.approx.ftz.f32 	%f1411, %f1418, %f1419;
	.loc	22	192	0
	mov.f32 	%f1416, %f1407;
	bra.uni 	$Lt_89_297218;
$Lt_89_297474:
	.loc	22	196	0
	mov.f32 	%f1411, 0f00000000;  	// 0
	mov.f32 	%f1416, 0f00000000;  	// 0
$Lt_89_297218:
	mov.f32 	%f1412, 0f00000000;  	// 0
	bra.uni 	$Lt_89_298242;
$Lt_89_296962:
	setp.gt.ftz.f32 	%p115, %f6, %f2;
	@!%p115 bra 	$Lt_89_297986;
	.loc	22	204	0
	max.ftz.f32 	%f1402, %f12, %f13;
	setp.lt.ftz.f32 	%p109, %f12, %f13;
	max.ftz.f32 	%f1403, %f1402, %f14;
	selp.f32 	%f1404, %f12, %f13, %p109;
	setp.lt.ftz.f32 	%p110, %f1404, %f14;
	selp.f32 	%f1405, %f1404, %f14, %p110;
	sub.ftz.f32 	%f1406, %f1403, %f1405;
	cvt.ftz.sat.f32.f32 	%f1407, %f1406;
	sub.ftz.f32 	%f1420, %f4, %f2;
	mul.ftz.f32 	%f1421, %f1407, %f1420;
	sub.ftz.f32 	%f1422, %f6, %f2;
	div.approx.ftz.f32 	%f1412, %f1421, %f1422;
	.loc	22	205	0
	mov.f32 	%f1416, %f1407;
	bra.uni 	$Lt_89_297730;
$Lt_89_297986:
	.loc	22	209	0
	mov.f32 	%f1416, 0f00000000;  	// 0
	mov.f32 	%f1412, 0f00000000;  	// 0
$Lt_89_297730:
	.loc	22	211	0
	mov.f32 	%f1411, 0f00000000;  	// 0
	bra.uni 	$Lt_89_298242;
$Lt_89_296450:
	@!%p112 bra 	$Lt_89_298498;
	setp.lt.ftz.f32 	%p116, %f4, %f2;
	@!%p116 bra 	$Lt_89_299010;
	.loc	22	220	0
	max.ftz.f32 	%f1402, %f12, %f13;
	setp.lt.ftz.f32 	%p109, %f12, %f13;
	max.ftz.f32 	%f1403, %f1402, %f14;
	selp.f32 	%f1404, %f12, %f13, %p109;
	setp.lt.ftz.f32 	%p110, %f1404, %f14;
	selp.f32 	%f1405, %f1404, %f14, %p110;
	sub.ftz.f32 	%f1406, %f1403, %f1405;
	cvt.ftz.sat.f32.f32 	%f1407, %f1406;
	sub.ftz.f32 	%f1423, %f6, %f4;
	mul.ftz.f32 	%f1424, %f1407, %f1423;
	sub.ftz.f32 	%f1425, %f2, %f4;
	div.approx.ftz.f32 	%f1416, %f1424, %f1425;
	.loc	22	221	0
	mov.f32 	%f1411, %f1407;
	bra.uni 	$Lt_89_298754;
$Lt_89_299010:
	.loc	22	225	0
	mov.f32 	%f1411, 0f00000000;  	// 0
	mov.f32 	%f1416, 0f00000000;  	// 0
$Lt_89_298754:
	mov.f32 	%f1412, 0f00000000;  	// 0
	bra.uni 	$Lt_89_298242;
$Lt_89_298498:
	@!%p104 bra 	$Lt_89_299522;
	.loc	22	233	0
	max.ftz.f32 	%f1402, %f12, %f13;
	setp.lt.ftz.f32 	%p109, %f12, %f13;
	max.ftz.f32 	%f1403, %f1402, %f14;
	selp.f32 	%f1404, %f12, %f13, %p109;
	setp.lt.ftz.f32 	%p110, %f1404, %f14;
	selp.f32 	%f1405, %f1404, %f14, %p110;
	sub.ftz.f32 	%f1406, %f1403, %f1405;
	cvt.ftz.sat.f32.f32 	%f1407, %f1406;
	sub.ftz.f32 	%f1426, %f6, %f2;
	mul.ftz.f32 	%f1427, %f1407, %f1426;
	sub.ftz.f32 	%f1428, %f4, %f2;
	div.approx.ftz.f32 	%f1416, %f1427, %f1428;
	.loc	22	234	0
	mov.f32 	%f1412, %f1407;
	bra.uni 	$Lt_89_299266;
$Lt_89_299522:
	.loc	22	238	0
	mov.f32 	%f1416, 0f00000000;  	// 0
	mov.f32 	%f1412, 0f00000000;  	// 0
$Lt_89_299266:
	.loc	22	240	0
	mov.f32 	%f1411, 0f00000000;  	// 0
$Lt_89_298242:
$Lt_89_296194:
$Lt_89_294146:
	.loc	22	113	0
	ld.const.f32 	%f501, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1429, %f13, %f501;
	mul.ftz.f32 	%f1430, %f1412, %f501;
	ld.const.f32 	%f500, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1431, %f500, %f14, %f1429;
	fma.rn.ftz.f32 	%f1432, %f500, %f1416, %f1430;
	ld.const.f32 	%f499, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1433, %f499, %f12, %f1431;
	fma.rn.ftz.f32 	%f1434, %f499, %f1411, %f1432;
	cvt.ftz.sat.f32.f32 	%f1435, %f1433;
	cvt.ftz.sat.f32.f32 	%f1436, %f1434;
	sub.ftz.f32 	%f1437, %f1435, %f1436;
	add.ftz.f32 	%f1438, %f1437, %f1411;
	mov.f32 	%f1439, %f1438;
	add.ftz.f32 	%f1440, %f1437, %f1412;
	mov.f32 	%f1441, %f1440;
	add.ftz.f32 	%f1442, %f1437, %f1416;
	mov.f32 	%f1443, %f1442;
	.loc	22	50	0
	mul.ftz.f32 	%f1444, %f1440, %f501;
	fma.rn.ftz.f32 	%f1445, %f500, %f1442, %f1444;
	fma.rn.ftz.f32 	%f1446, %f499, %f1438, %f1445;
	cvt.ftz.sat.f32.f32 	%f1447, %f1446;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p117, %f1440, %f1438;
	selp.f32 	%f1448, %f1438, %f1440, %p117;
	setp.lt.ftz.f32 	%p118, %f1448, %f1442;
	selp.f32 	%f1449, %f1448, %f1442, %p118;
	mov.f32 	%f1450, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p119, %f1449, %f1450;
	@!%p119 bra 	$Lt_89_299778;
	.loc	22	119	0
	sub.ftz.f32 	%f1451, %f1447, %f1449;
	sub.ftz.f32 	%f1452, %f1442, %f1447;
	mul.ftz.f32 	%f1453, %f1447, %f1452;
	div.approx.ftz.f32 	%f1454, %f1453, %f1451;
	add.ftz.f32 	%f1443, %f1447, %f1454;
	.loc	22	120	0
	sub.ftz.f32 	%f1455, %f1440, %f1447;
	mul.ftz.f32 	%f1456, %f1447, %f1455;
	div.approx.ftz.f32 	%f1457, %f1456, %f1451;
	add.ftz.f32 	%f1441, %f1447, %f1457;
	.loc	22	121	0
	sub.ftz.f32 	%f1458, %f1438, %f1447;
	mul.ftz.f32 	%f1459, %f1447, %f1458;
	div.approx.ftz.f32 	%f1460, %f1459, %f1451;
	add.ftz.f32 	%f1439, %f1447, %f1460;
$Lt_89_299778:
	max.ftz.f32 	%f1461, %f1440, %f1438;
	max.ftz.f32 	%f1462, %f1461, %f1442;
	mov.f32 	%f1463, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p120, %f1462, %f1463;
	@!%p120 bra 	$Lt_89_300290;
	.loc	27	529	0
	mov.f32 	%f1464, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1465, %f1464, %f1447;
	sub.ftz.f32 	%f1466, %f1462, %f1447;
	sub.ftz.f32 	%f1467, %f1443, %f1447;
	mul.ftz.f32 	%f1468, %f1465, %f1467;
	div.approx.ftz.f32 	%f1469, %f1468, %f1466;
	.loc	22	125	0
	add.ftz.f32 	%f1443, %f1469, %f1447;
	.loc	27	529	0
	sub.ftz.f32 	%f1470, %f1441, %f1447;
	mul.ftz.f32 	%f1471, %f1465, %f1470;
	div.approx.ftz.f32 	%f1472, %f1471, %f1466;
	.loc	22	126	0
	add.ftz.f32 	%f1441, %f1472, %f1447;
	.loc	27	529	0
	sub.ftz.f32 	%f1473, %f1439, %f1447;
	mul.ftz.f32 	%f1474, %f1465, %f1473;
	div.approx.ftz.f32 	%f1475, %f1474, %f1466;
	.loc	22	127	0
	add.ftz.f32 	%f1439, %f1475, %f1447;
$Lt_89_300290:
	.loc	22	468	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1476, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1476;
	mov.f32 	%f1477, %f175;
	mov.f32 	%f1478, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1479, %f175, %f1478;
	mov.f32 	%f1480, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p121, %f1479, %f1480;
	@!%p121 bra 	$Lt_89_301058;
	mov.f32 	%f1481, 0f00000000;  	// 0
	mov.f32 	%f1482, 0f00000000;  	// 0
	mov.f32 	%f1483, 0f00000000;  	// 0
	mov.f32 	%f1477, 0f00000000;  	// 0
	bra.uni 	$Lt_89_300802;
$Lt_89_301058:
	mov.f32 	%f1484, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1484, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1485, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1486, %f172, %f185;
	sub.ftz.f32 	%f189, %f1485, %f1486;
	mul.ftz.f32 	%f1487, %f184, %f1439;
	fma.rn.ftz.f32 	%f1488, %f1439, %f15, %f1487;
	mul.ftz.f32 	%f1489, %f186, %f1488;
	fma.rn.ftz.f32 	%f1483, %f12, %f189, %f1489;
	mul.ftz.f32 	%f1490, %f184, %f1441;
	fma.rn.ftz.f32 	%f1491, %f1441, %f15, %f1490;
	mul.ftz.f32 	%f1492, %f186, %f1491;
	fma.rn.ftz.f32 	%f1482, %f13, %f189, %f1492;
	mul.ftz.f32 	%f1493, %f184, %f1443;
	fma.rn.ftz.f32 	%f1494, %f1443, %f15, %f1493;
	mul.ftz.f32 	%f1495, %f186, %f1494;
	fma.rn.ftz.f32 	%f1481, %f14, %f189, %f1495;
$Lt_89_300802:
	.loc	6	213	0
	mov.f32 	%f2, %f1483;
	mov.f32 	%f4, %f1482;
	mov.f32 	%f6, %f1481;
	mov.f32 	%f9, %f1477;
	bra.uni 	$Lt_89_311554;
$Lt_89_6402:
	.loc	22	154	0
	max.ftz.f32 	%f1402, %f12, %f13;
	setp.lt.ftz.f32 	%p109, %f12, %f13;
	max.ftz.f32 	%f1403, %f1402, %f14;
	selp.f32 	%f1404, %f12, %f13, %p109;
	setp.lt.ftz.f32 	%p110, %f1404, %f14;
	selp.f32 	%f1405, %f1404, %f14, %p110;
	setp.eq.ftz.f32 	%p122, %f1405, %f14;
	@!%p122 bra 	$Lt_89_301570;
	setp.eq.ftz.f32 	%p123, %f1403, %f13;
	@!%p123 bra 	$Lt_89_302082;
	setp.gt.ftz.f32 	%p124, %f13, %f14;
	@!%p124 bra 	$Lt_89_302594;
	.loc	22	161	0
	setp.gt.ftz.f32 	%p104, %f4, %f2;
	max.ftz.f32 	%f1398, %f4, %f2;
	selp.f32 	%f1399, %f2, %f4, %p104;
	max.ftz.f32 	%f1400, %f1398, %f6;
	setp.lt.ftz.f32 	%p105, %f1399, %f6;
	selp.f32 	%f1401, %f1399, %f6, %p105;
	sub.ftz.f32 	%f1496, %f1400, %f1401;
	cvt.ftz.sat.f32.f32 	%f1497, %f1496;
	sub.ftz.f32 	%f1498, %f12, %f14;
	mul.ftz.f32 	%f1499, %f1497, %f1498;
	sub.ftz.f32 	%f1500, %f13, %f14;
	div.approx.ftz.f32 	%f1501, %f1499, %f1500;
	.loc	22	162	0
	mov.f32 	%f1502, %f1497;
	bra.uni 	$Lt_89_302850;
$Lt_89_302594:
	.loc	22	166	0
	mov.f32 	%f1501, 0f00000000;  	// 0
	mov.f32 	%f1502, 0f00000000;  	// 0
	bra.uni 	$Lt_89_302850;
$Lt_89_302082:
	setp.gt.ftz.f32 	%p125, %f12, %f14;
	@!%p125 bra 	$Lt_89_303106;
	.loc	22	173	0
	setp.gt.ftz.f32 	%p104, %f4, %f2;
	max.ftz.f32 	%f1398, %f4, %f2;
	selp.f32 	%f1399, %f2, %f4, %p104;
	max.ftz.f32 	%f1400, %f1398, %f6;
	setp.lt.ftz.f32 	%p105, %f1399, %f6;
	selp.f32 	%f1401, %f1399, %f6, %p105;
	sub.ftz.f32 	%f1496, %f1400, %f1401;
	cvt.ftz.sat.f32.f32 	%f1497, %f1496;
	sub.ftz.f32 	%f1503, %f13, %f14;
	mul.ftz.f32 	%f1504, %f1497, %f1503;
	sub.ftz.f32 	%f1505, %f12, %f14;
	div.approx.ftz.f32 	%f1502, %f1504, %f1505;
	.loc	22	174	0
	mov.f32 	%f1501, %f1497;
	bra.uni 	$Lt_89_302850;
$Lt_89_303106:
	.loc	22	178	0
	mov.f32 	%f1501, 0f00000000;  	// 0
	mov.f32 	%f1502, 0f00000000;  	// 0
$Lt_89_302850:
$Lt_89_301826:
	mov.f32 	%f1506, 0f00000000;  	// 0
	bra.uni 	$Lt_89_305410;
$Lt_89_301570:
	setp.eq.ftz.f32 	%p126, %f1405, %f13;
	setp.eq.ftz.f32 	%p127, %f1403, %f14;
	@!%p127 bra 	$Lt_89_303618;
	@!%p126 bra 	$Lt_89_304130;
	setp.lt.ftz.f32 	%p128, %f13, %f14;
	@!%p128 bra 	$Lt_89_304642;
	.loc	22	191	0
	setp.gt.ftz.f32 	%p104, %f4, %f2;
	max.ftz.f32 	%f1398, %f4, %f2;
	selp.f32 	%f1399, %f2, %f4, %p104;
	max.ftz.f32 	%f1400, %f1398, %f6;
	setp.lt.ftz.f32 	%p105, %f1399, %f6;
	selp.f32 	%f1401, %f1399, %f6, %p105;
	sub.ftz.f32 	%f1496, %f1400, %f1401;
	cvt.ftz.sat.f32.f32 	%f1497, %f1496;
	sub.ftz.f32 	%f1507, %f12, %f13;
	mul.ftz.f32 	%f1508, %f1497, %f1507;
	sub.ftz.f32 	%f1509, %f14, %f13;
	div.approx.ftz.f32 	%f1501, %f1508, %f1509;
	.loc	22	192	0
	mov.f32 	%f1506, %f1497;
	bra.uni 	$Lt_89_304386;
$Lt_89_304642:
	.loc	22	196	0
	mov.f32 	%f1501, 0f00000000;  	// 0
	mov.f32 	%f1506, 0f00000000;  	// 0
$Lt_89_304386:
	mov.f32 	%f1502, 0f00000000;  	// 0
	bra.uni 	$Lt_89_305410;
$Lt_89_304130:
	setp.lt.ftz.f32 	%p129, %f12, %f14;
	@!%p129 bra 	$Lt_89_305154;
	.loc	22	204	0
	setp.gt.ftz.f32 	%p104, %f4, %f2;
	max.ftz.f32 	%f1398, %f4, %f2;
	selp.f32 	%f1399, %f2, %f4, %p104;
	max.ftz.f32 	%f1400, %f1398, %f6;
	setp.lt.ftz.f32 	%p105, %f1399, %f6;
	selp.f32 	%f1401, %f1399, %f6, %p105;
	sub.ftz.f32 	%f1496, %f1400, %f1401;
	cvt.ftz.sat.f32.f32 	%f1497, %f1496;
	sub.ftz.f32 	%f1510, %f13, %f12;
	mul.ftz.f32 	%f1511, %f1497, %f1510;
	sub.ftz.f32 	%f1512, %f14, %f12;
	div.approx.ftz.f32 	%f1502, %f1511, %f1512;
	.loc	22	205	0
	mov.f32 	%f1506, %f1497;
	bra.uni 	$Lt_89_304898;
$Lt_89_305154:
	.loc	22	209	0
	mov.f32 	%f1506, 0f00000000;  	// 0
	mov.f32 	%f1502, 0f00000000;  	// 0
$Lt_89_304898:
	.loc	22	211	0
	mov.f32 	%f1501, 0f00000000;  	// 0
	bra.uni 	$Lt_89_305410;
$Lt_89_303618:
	@!%p126 bra 	$Lt_89_305666;
	setp.gt.ftz.f32 	%p130, %f12, %f13;
	@!%p130 bra 	$Lt_89_306178;
	.loc	22	220	0
	setp.gt.ftz.f32 	%p104, %f4, %f2;
	max.ftz.f32 	%f1398, %f4, %f2;
	selp.f32 	%f1399, %f2, %f4, %p104;
	max.ftz.f32 	%f1400, %f1398, %f6;
	setp.lt.ftz.f32 	%p105, %f1399, %f6;
	selp.f32 	%f1401, %f1399, %f6, %p105;
	sub.ftz.f32 	%f1496, %f1400, %f1401;
	cvt.ftz.sat.f32.f32 	%f1497, %f1496;
	sub.ftz.f32 	%f1513, %f14, %f13;
	mul.ftz.f32 	%f1514, %f1497, %f1513;
	sub.ftz.f32 	%f1515, %f12, %f13;
	div.approx.ftz.f32 	%f1506, %f1514, %f1515;
	.loc	22	221	0
	mov.f32 	%f1501, %f1497;
	bra.uni 	$Lt_89_305922;
$Lt_89_306178:
	.loc	22	225	0
	mov.f32 	%f1501, 0f00000000;  	// 0
	mov.f32 	%f1506, 0f00000000;  	// 0
$Lt_89_305922:
	mov.f32 	%f1502, 0f00000000;  	// 0
	bra.uni 	$Lt_89_305410;
$Lt_89_305666:
	@!%p109 bra 	$Lt_89_306690;
	.loc	22	233	0
	setp.gt.ftz.f32 	%p104, %f4, %f2;
	max.ftz.f32 	%f1398, %f4, %f2;
	selp.f32 	%f1399, %f2, %f4, %p104;
	max.ftz.f32 	%f1400, %f1398, %f6;
	setp.lt.ftz.f32 	%p105, %f1399, %f6;
	selp.f32 	%f1401, %f1399, %f6, %p105;
	sub.ftz.f32 	%f1496, %f1400, %f1401;
	cvt.ftz.sat.f32.f32 	%f1497, %f1496;
	sub.ftz.f32 	%f1516, %f14, %f12;
	mul.ftz.f32 	%f1517, %f1497, %f1516;
	sub.ftz.f32 	%f1518, %f13, %f12;
	div.approx.ftz.f32 	%f1506, %f1517, %f1518;
	.loc	22	234	0
	mov.f32 	%f1502, %f1497;
	bra.uni 	$Lt_89_306434;
$Lt_89_306690:
	.loc	22	238	0
	mov.f32 	%f1506, 0f00000000;  	// 0
	mov.f32 	%f1502, 0f00000000;  	// 0
$Lt_89_306434:
	.loc	22	240	0
	mov.f32 	%f1501, 0f00000000;  	// 0
$Lt_89_305410:
$Lt_89_303362:
$Lt_89_301314:
	.loc	22	113	0
	ld.const.f32 	%f501, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1519, %f13, %f501;
	mul.ftz.f32 	%f1520, %f1502, %f501;
	ld.const.f32 	%f500, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1521, %f500, %f14, %f1519;
	fma.rn.ftz.f32 	%f1522, %f500, %f1506, %f1520;
	ld.const.f32 	%f499, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1523, %f499, %f12, %f1521;
	fma.rn.ftz.f32 	%f1524, %f499, %f1501, %f1522;
	cvt.ftz.sat.f32.f32 	%f1525, %f1523;
	cvt.ftz.sat.f32.f32 	%f1526, %f1524;
	sub.ftz.f32 	%f1527, %f1525, %f1526;
	add.ftz.f32 	%f1528, %f1527, %f1501;
	mov.f32 	%f1529, %f1528;
	add.ftz.f32 	%f1530, %f1527, %f1502;
	mov.f32 	%f1531, %f1530;
	add.ftz.f32 	%f1532, %f1527, %f1506;
	mov.f32 	%f1533, %f1532;
	.loc	22	50	0
	mul.ftz.f32 	%f1534, %f1530, %f501;
	fma.rn.ftz.f32 	%f1535, %f500, %f1532, %f1534;
	fma.rn.ftz.f32 	%f1536, %f499, %f1528, %f1535;
	cvt.ftz.sat.f32.f32 	%f1537, %f1536;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p131, %f1530, %f1528;
	selp.f32 	%f1538, %f1528, %f1530, %p131;
	setp.lt.ftz.f32 	%p132, %f1538, %f1532;
	selp.f32 	%f1539, %f1538, %f1532, %p132;
	mov.f32 	%f1540, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p133, %f1539, %f1540;
	@!%p133 bra 	$Lt_89_306946;
	.loc	22	119	0
	sub.ftz.f32 	%f1541, %f1537, %f1539;
	sub.ftz.f32 	%f1542, %f1532, %f1537;
	mul.ftz.f32 	%f1543, %f1537, %f1542;
	div.approx.ftz.f32 	%f1544, %f1543, %f1541;
	add.ftz.f32 	%f1533, %f1537, %f1544;
	.loc	22	120	0
	sub.ftz.f32 	%f1545, %f1530, %f1537;
	mul.ftz.f32 	%f1546, %f1537, %f1545;
	div.approx.ftz.f32 	%f1547, %f1546, %f1541;
	add.ftz.f32 	%f1531, %f1537, %f1547;
	.loc	22	121	0
	sub.ftz.f32 	%f1548, %f1528, %f1537;
	mul.ftz.f32 	%f1549, %f1537, %f1548;
	div.approx.ftz.f32 	%f1550, %f1549, %f1541;
	add.ftz.f32 	%f1529, %f1537, %f1550;
$Lt_89_306946:
	max.ftz.f32 	%f1551, %f1530, %f1528;
	max.ftz.f32 	%f1552, %f1551, %f1532;
	mov.f32 	%f1553, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p134, %f1552, %f1553;
	@!%p134 bra 	$Lt_89_307458;
	.loc	27	529	0
	mov.f32 	%f1554, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1555, %f1554, %f1537;
	sub.ftz.f32 	%f1556, %f1552, %f1537;
	sub.ftz.f32 	%f1557, %f1533, %f1537;
	mul.ftz.f32 	%f1558, %f1555, %f1557;
	div.approx.ftz.f32 	%f1559, %f1558, %f1556;
	.loc	22	125	0
	add.ftz.f32 	%f1533, %f1559, %f1537;
	.loc	27	529	0
	sub.ftz.f32 	%f1560, %f1531, %f1537;
	mul.ftz.f32 	%f1561, %f1555, %f1560;
	div.approx.ftz.f32 	%f1562, %f1561, %f1556;
	.loc	22	126	0
	add.ftz.f32 	%f1531, %f1562, %f1537;
	.loc	27	529	0
	sub.ftz.f32 	%f1563, %f1529, %f1537;
	mul.ftz.f32 	%f1564, %f1555, %f1563;
	div.approx.ftz.f32 	%f1565, %f1564, %f1556;
	.loc	22	127	0
	add.ftz.f32 	%f1529, %f1565, %f1537;
$Lt_89_307458:
	.loc	22	468	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1566, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1566;
	mov.f32 	%f1567, %f175;
	mov.f32 	%f1568, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1569, %f175, %f1568;
	mov.f32 	%f1570, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p135, %f1569, %f1570;
	@!%p135 bra 	$Lt_89_308226;
	mov.f32 	%f1571, 0f00000000;  	// 0
	mov.f32 	%f1572, 0f00000000;  	// 0
	mov.f32 	%f1573, 0f00000000;  	// 0
	mov.f32 	%f1567, 0f00000000;  	// 0
	bra.uni 	$Lt_89_307970;
$Lt_89_308226:
	mov.f32 	%f1574, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1574, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1575, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1576, %f172, %f185;
	sub.ftz.f32 	%f189, %f1575, %f1576;
	mul.ftz.f32 	%f1577, %f184, %f1529;
	fma.rn.ftz.f32 	%f1578, %f1529, %f15, %f1577;
	mul.ftz.f32 	%f1579, %f186, %f1578;
	fma.rn.ftz.f32 	%f1573, %f12, %f189, %f1579;
	mul.ftz.f32 	%f1580, %f184, %f1531;
	fma.rn.ftz.f32 	%f1581, %f1531, %f15, %f1580;
	mul.ftz.f32 	%f1582, %f186, %f1581;
	fma.rn.ftz.f32 	%f1572, %f13, %f189, %f1582;
	mul.ftz.f32 	%f1583, %f184, %f1533;
	fma.rn.ftz.f32 	%f1584, %f1533, %f15, %f1583;
	mul.ftz.f32 	%f1585, %f186, %f1584;
	fma.rn.ftz.f32 	%f1571, %f14, %f189, %f1585;
$Lt_89_307970:
	.loc	6	214	0
	mov.f32 	%f2, %f1573;
	mov.f32 	%f4, %f1572;
	mov.f32 	%f6, %f1571;
	mov.f32 	%f9, %f1567;
	bra.uni 	$Lt_89_311554;
$Lt_89_6658:
	.loc	22	113	0
	ld.const.f32 	%f501, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1586, %f501, %f4;
	mul.ftz.f32 	%f1587, %f13, %f501;
	ld.const.f32 	%f500, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1588, %f500, %f6, %f1586;
	fma.rn.ftz.f32 	%f1589, %f500, %f14, %f1587;
	ld.const.f32 	%f499, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1590, %f499, %f2, %f1588;
	fma.rn.ftz.f32 	%f1591, %f499, %f12, %f1589;
	cvt.ftz.sat.f32.f32 	%f1592, %f1590;
	cvt.ftz.sat.f32.f32 	%f1593, %f1591;
	sub.ftz.f32 	%f1594, %f1593, %f1592;
	add.ftz.f32 	%f1595, %f1594, %f2;
	mov.f32 	%f1596, %f1595;
	add.ftz.f32 	%f1597, %f1594, %f4;
	mov.f32 	%f1598, %f1597;
	add.ftz.f32 	%f1599, %f1594, %f6;
	mov.f32 	%f1600, %f1599;
	.loc	22	50	0
	mul.ftz.f32 	%f1601, %f1597, %f501;
	fma.rn.ftz.f32 	%f1602, %f500, %f1599, %f1601;
	fma.rn.ftz.f32 	%f1603, %f499, %f1595, %f1602;
	cvt.ftz.sat.f32.f32 	%f1604, %f1603;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p136, %f1597, %f1595;
	selp.f32 	%f1605, %f1595, %f1597, %p136;
	setp.lt.ftz.f32 	%p137, %f1605, %f1599;
	selp.f32 	%f1606, %f1605, %f1599, %p137;
	mov.f32 	%f1607, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p138, %f1606, %f1607;
	@!%p138 bra 	$Lt_89_308482;
	.loc	22	119	0
	sub.ftz.f32 	%f1608, %f1604, %f1606;
	sub.ftz.f32 	%f1609, %f1599, %f1604;
	mul.ftz.f32 	%f1610, %f1604, %f1609;
	div.approx.ftz.f32 	%f1611, %f1610, %f1608;
	add.ftz.f32 	%f1600, %f1604, %f1611;
	.loc	22	120	0
	sub.ftz.f32 	%f1612, %f1597, %f1604;
	mul.ftz.f32 	%f1613, %f1604, %f1612;
	div.approx.ftz.f32 	%f1614, %f1613, %f1608;
	add.ftz.f32 	%f1598, %f1604, %f1614;
	.loc	22	121	0
	sub.ftz.f32 	%f1615, %f1595, %f1604;
	mul.ftz.f32 	%f1616, %f1604, %f1615;
	div.approx.ftz.f32 	%f1617, %f1616, %f1608;
	add.ftz.f32 	%f1596, %f1604, %f1617;
$Lt_89_308482:
	max.ftz.f32 	%f1618, %f1597, %f1595;
	max.ftz.f32 	%f1619, %f1618, %f1599;
	mov.f32 	%f1620, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p139, %f1619, %f1620;
	@!%p139 bra 	$Lt_89_308994;
	.loc	27	529	0
	mov.f32 	%f1621, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1622, %f1621, %f1604;
	sub.ftz.f32 	%f1623, %f1619, %f1604;
	sub.ftz.f32 	%f1624, %f1600, %f1604;
	mul.ftz.f32 	%f1625, %f1622, %f1624;
	div.approx.ftz.f32 	%f1626, %f1625, %f1623;
	.loc	22	125	0
	add.ftz.f32 	%f1600, %f1626, %f1604;
	.loc	27	529	0
	sub.ftz.f32 	%f1627, %f1598, %f1604;
	mul.ftz.f32 	%f1628, %f1622, %f1627;
	div.approx.ftz.f32 	%f1629, %f1628, %f1623;
	.loc	22	126	0
	add.ftz.f32 	%f1598, %f1629, %f1604;
	.loc	27	529	0
	sub.ftz.f32 	%f1630, %f1596, %f1604;
	mul.ftz.f32 	%f1631, %f1622, %f1630;
	div.approx.ftz.f32 	%f1632, %f1631, %f1623;
	.loc	22	127	0
	add.ftz.f32 	%f1596, %f1632, %f1604;
$Lt_89_308994:
	.loc	22	468	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1633, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1633;
	mov.f32 	%f1634, %f175;
	mov.f32 	%f1635, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1636, %f175, %f1635;
	mov.f32 	%f1637, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p140, %f1636, %f1637;
	@!%p140 bra 	$Lt_89_309762;
	mov.f32 	%f1638, 0f00000000;  	// 0
	mov.f32 	%f1639, 0f00000000;  	// 0
	mov.f32 	%f1640, 0f00000000;  	// 0
	mov.f32 	%f1634, 0f00000000;  	// 0
	bra.uni 	$Lt_89_309506;
$Lt_89_309762:
	mov.f32 	%f1641, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1641, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1642, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1643, %f172, %f185;
	sub.ftz.f32 	%f189, %f1642, %f1643;
	mul.ftz.f32 	%f1644, %f184, %f1596;
	fma.rn.ftz.f32 	%f1645, %f1596, %f15, %f1644;
	mul.ftz.f32 	%f1646, %f186, %f1645;
	fma.rn.ftz.f32 	%f1640, %f12, %f189, %f1646;
	mul.ftz.f32 	%f1647, %f184, %f1598;
	fma.rn.ftz.f32 	%f1648, %f1598, %f15, %f1647;
	mul.ftz.f32 	%f1649, %f186, %f1648;
	fma.rn.ftz.f32 	%f1639, %f13, %f189, %f1649;
	mul.ftz.f32 	%f1650, %f184, %f1600;
	fma.rn.ftz.f32 	%f1651, %f1600, %f15, %f1650;
	mul.ftz.f32 	%f1652, %f186, %f1651;
	fma.rn.ftz.f32 	%f1638, %f14, %f189, %f1652;
$Lt_89_309506:
	.loc	6	215	0
	mov.f32 	%f2, %f1640;
	mov.f32 	%f4, %f1639;
	mov.f32 	%f6, %f1638;
	mov.f32 	%f9, %f1634;
	bra.uni 	$Lt_89_311554;
$Lt_89_6914:
	.loc	22	113	0
	ld.const.f32 	%f501, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1653, %f501, %f4;
	mul.ftz.f32 	%f1654, %f13, %f501;
	ld.const.f32 	%f500, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1655, %f500, %f6, %f1653;
	fma.rn.ftz.f32 	%f1656, %f500, %f14, %f1654;
	ld.const.f32 	%f499, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1657, %f499, %f2, %f1655;
	fma.rn.ftz.f32 	%f1658, %f499, %f12, %f1656;
	cvt.ftz.sat.f32.f32 	%f1659, %f1657;
	cvt.ftz.sat.f32.f32 	%f1660, %f1658;
	sub.ftz.f32 	%f1661, %f1659, %f1660;
	add.ftz.f32 	%f1662, %f1661, %f12;
	mov.f32 	%f1663, %f1662;
	add.ftz.f32 	%f1664, %f1661, %f13;
	mov.f32 	%f1665, %f1664;
	add.ftz.f32 	%f1666, %f1661, %f14;
	mov.f32 	%f1667, %f1666;
	.loc	22	50	0
	mul.ftz.f32 	%f1668, %f1664, %f501;
	fma.rn.ftz.f32 	%f1669, %f500, %f1666, %f1668;
	fma.rn.ftz.f32 	%f1670, %f499, %f1662, %f1669;
	cvt.ftz.sat.f32.f32 	%f1671, %f1670;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p141, %f1664, %f1662;
	selp.f32 	%f1672, %f1662, %f1664, %p141;
	setp.lt.ftz.f32 	%p142, %f1672, %f1666;
	selp.f32 	%f1673, %f1672, %f1666, %p142;
	mov.f32 	%f1674, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p143, %f1673, %f1674;
	@!%p143 bra 	$Lt_89_310018;
	.loc	22	119	0
	sub.ftz.f32 	%f1675, %f1671, %f1673;
	sub.ftz.f32 	%f1676, %f1666, %f1671;
	mul.ftz.f32 	%f1677, %f1671, %f1676;
	div.approx.ftz.f32 	%f1678, %f1677, %f1675;
	add.ftz.f32 	%f1667, %f1671, %f1678;
	.loc	22	120	0
	sub.ftz.f32 	%f1679, %f1664, %f1671;
	mul.ftz.f32 	%f1680, %f1671, %f1679;
	div.approx.ftz.f32 	%f1681, %f1680, %f1675;
	add.ftz.f32 	%f1665, %f1671, %f1681;
	.loc	22	121	0
	sub.ftz.f32 	%f1682, %f1662, %f1671;
	mul.ftz.f32 	%f1683, %f1671, %f1682;
	div.approx.ftz.f32 	%f1684, %f1683, %f1675;
	add.ftz.f32 	%f1663, %f1671, %f1684;
$Lt_89_310018:
	max.ftz.f32 	%f1685, %f1664, %f1662;
	max.ftz.f32 	%f1686, %f1685, %f1666;
	mov.f32 	%f1687, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p144, %f1686, %f1687;
	@!%p144 bra 	$Lt_89_310530;
	.loc	27	529	0
	mov.f32 	%f1688, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1689, %f1688, %f1671;
	sub.ftz.f32 	%f1690, %f1686, %f1671;
	sub.ftz.f32 	%f1691, %f1667, %f1671;
	mul.ftz.f32 	%f1692, %f1689, %f1691;
	div.approx.ftz.f32 	%f1693, %f1692, %f1690;
	.loc	22	125	0
	add.ftz.f32 	%f1667, %f1693, %f1671;
	.loc	27	529	0
	sub.ftz.f32 	%f1694, %f1665, %f1671;
	mul.ftz.f32 	%f1695, %f1689, %f1694;
	div.approx.ftz.f32 	%f1696, %f1695, %f1690;
	.loc	22	126	0
	add.ftz.f32 	%f1665, %f1696, %f1671;
	.loc	27	529	0
	sub.ftz.f32 	%f1697, %f1663, %f1671;
	mul.ftz.f32 	%f1698, %f1689, %f1697;
	div.approx.ftz.f32 	%f1699, %f1698, %f1690;
	.loc	22	127	0
	add.ftz.f32 	%f1663, %f1699, %f1671;
$Lt_89_310530:
	.loc	22	468	0
	mul.ftz.f32 	%f172, %f11, %f9;
	add.ftz.f32 	%f173, %f172, %f15;
	mul.ftz.f32 	%f1700, %f172, %f15;
	sub.ftz.f32 	%f175, %f173, %f1700;
	mov.f32 	%f1701, %f175;
	mov.f32 	%f1702, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1703, %f175, %f1702;
	mov.f32 	%f1704, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p145, %f1703, %f1704;
	@!%p145 bra 	$Lt_89_311298;
	mov.f32 	%f1705, 0f00000000;  	// 0
	mov.f32 	%f1706, 0f00000000;  	// 0
	mov.f32 	%f1707, 0f00000000;  	// 0
	mov.f32 	%f1701, 0f00000000;  	// 0
	bra.uni 	$Lt_89_311042;
$Lt_89_311298:
	mov.f32 	%f1708, 0f3f800000;  	// 1
	sub.ftz.f32 	%f184, %f1708, %f15;
	rcp.approx.ftz.f32 	%f185, %f175;
	mul.ftz.f32 	%f186, %f185, %f172;
	mov.f32 	%f1709, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1710, %f172, %f185;
	sub.ftz.f32 	%f189, %f1709, %f1710;
	mul.ftz.f32 	%f1711, %f184, %f1663;
	fma.rn.ftz.f32 	%f1712, %f1663, %f15, %f1711;
	mul.ftz.f32 	%f1713, %f186, %f1712;
	fma.rn.ftz.f32 	%f1707, %f12, %f189, %f1713;
	mul.ftz.f32 	%f1714, %f184, %f1665;
	fma.rn.ftz.f32 	%f1715, %f1665, %f15, %f1714;
	mul.ftz.f32 	%f1716, %f186, %f1715;
	fma.rn.ftz.f32 	%f1706, %f13, %f189, %f1716;
	mul.ftz.f32 	%f1717, %f184, %f1667;
	fma.rn.ftz.f32 	%f1718, %f1667, %f15, %f1717;
	mul.ftz.f32 	%f1719, %f186, %f1718;
	fma.rn.ftz.f32 	%f1705, %f14, %f189, %f1719;
$Lt_89_311042:
	.loc	6	216	0
	mov.f32 	%f2, %f1707;
	mov.f32 	%f4, %f1706;
	mov.f32 	%f6, %f1705;
	mov.f32 	%f9, %f1701;
	bra.uni 	$Lt_89_311554;
$Lt_89_263682:
	.loc	6	218	0
	@!%p1 bra 	$Lt_89_311554;
	mov.u32 	%r114, 0;
	setp.ne.s32 	%p146, %r16, %r114;
	@%p146 bra 	$Lt_89_312066;
	.loc	6	226	0
	cvt.ftz.sat.f32.f32 	%f9, %f8;
$Lt_89_312066:
	mul.ftz.f32 	%f1720, %f11, %f9;
	mov.u32 	%r115, 0;
	setp.eq.s32 	%p147, %r16, %r115;
	@%p147 bra 	$Lt_89_313090;
	mov.f32 	%f1721, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1722, %f1720, %f1721;
	mov.f32 	%f1723, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p148, %f1722, %f1723;
	@!%p148 bra 	$Lt_89_313346;
	mov.f32 	%f6, 0f00000000;     	// 0
	mov.f32 	%f4, 0f00000000;     	// 0
	mov.f32 	%f2, 0f00000000;     	// 0
	bra.uni 	$Lt_89_313090;
$Lt_89_313346:
	.loc	6	238	0
	rcp.approx.ftz.f32 	%f1724, %f9;
	mul.ftz.f32 	%f1725, %f1724, %f2;
	mov.f32 	%f1726, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p149, %f1725, %f1726;
	@!%p149 bra 	$Lt_89_313602;
	.loc	5	242	0
	neg.ftz.f32 	%f1727, %f1725;
	lg2.approx.ftz.f32 	%f1728, %f1727;
	mov.f32 	%f1729, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1730, %f1728, %f1729;
	ex2.approx.ftz.f32 	%f1731, %f1730;
	neg.ftz.f32 	%f1732, %f1731;
	bra.uni 	$LDWendi___log2f_266_34;
$Lt_89_313602:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f1733, %f1725;
	mov.f32 	%f1734, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1735, %f1733, %f1734;
	ex2.approx.ftz.f32 	%f1732, %f1735;
$LDWendi___log2f_266_34:
	.loc	6	238	0
	mov.f32 	%f2, %f1732;
	.loc	6	239	0
	mul.ftz.f32 	%f1736, %f1724, %f4;
	mov.f32 	%f1737, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p150, %f1736, %f1737;
	@!%p150 bra 	$Lt_89_314114;
	.loc	5	242	0
	neg.ftz.f32 	%f1738, %f1736;
	lg2.approx.ftz.f32 	%f1739, %f1738;
	mov.f32 	%f1740, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1741, %f1739, %f1740;
	ex2.approx.ftz.f32 	%f1742, %f1741;
	neg.ftz.f32 	%f1743, %f1742;
	bra.uni 	$LDWendi___log2f_266_32;
$Lt_89_314114:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f1744, %f1736;
	mov.f32 	%f1745, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1746, %f1744, %f1745;
	ex2.approx.ftz.f32 	%f1743, %f1746;
$LDWendi___log2f_266_32:
	.loc	6	239	0
	mov.f32 	%f4, %f1743;
	.loc	6	240	0
	mul.ftz.f32 	%f1747, %f1724, %f6;
	mov.f32 	%f1748, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p151, %f1747, %f1748;
	@!%p151 bra 	$Lt_89_314626;
	.loc	5	242	0
	neg.ftz.f32 	%f1749, %f1747;
	lg2.approx.ftz.f32 	%f1750, %f1749;
	mov.f32 	%f1751, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1752, %f1750, %f1751;
	ex2.approx.ftz.f32 	%f1753, %f1752;
	neg.ftz.f32 	%f1754, %f1753;
	bra.uni 	$LDWendi___log2f_266_30;
$Lt_89_314626:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f1755, %f1747;
	mov.f32 	%f1756, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1757, %f1755, %f1756;
	ex2.approx.ftz.f32 	%f1754, %f1757;
$LDWendi___log2f_266_30:
	.loc	6	240	0
	mov.f32 	%f6, %f1754;
$Lt_89_313090:
$Lt_89_312578:
	.loc	6	243	0
	mov.f32 	%f9, %f1720;
$Lt_89_311554:
$Lt_89_263426:
	@!%p2 bra 	$Lt_89_315394;
	.loc	21	126	0
	mul.lo.u64 	%rd8, %rd3, 8;
	add.u64 	%rd9, %rd2, %rd8;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f2;
	mov.b32		%r116, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f4;
	mov.b32		%r117, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r118, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f9;
	mov.b32		%r119, %b1; }
	st.v4.u16 	[%rd9+0], {%r116,%r117,%r118,%r119};
	.loc	6	246	0
	bra.uni 	$Lt_89_315138;
$Lt_89_315394:
	.loc	21	126	0
	mul.lo.u64 	%rd10, %rd3, 16;
	add.u64 	%rd11, %rd2, %rd10;
	st.v4.f32 	[%rd11+0], {%f2,%f4,%f6,%f9};
$Lt_89_315138:
	.loc	6	253	0
	ret;
$LDWend__Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb:
	} // _Z20WriteCompositedPixel8PixelRGBP6float4i17DevicePixelFormatiiiif12IR_BlendModebb
	.global .texref sPointTexture;

	.visible .func (.param .align 16 .b8 __cudaretf__Z13sampleTextureff[16]) _Z13sampleTextureff (.param .f32 __cudaparmf1__Z13sampleTextureff, .param .f32 __cudaparmf2__Z13sampleTextureff)
	{
	.reg .f32 %f<24>;
	.loc	6	396	0
$LDWbegin__Z13sampleTextureff:
	ld.param.f32 	%f1, [__cudaparmf1__Z13sampleTextureff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z13sampleTextureff];
	mov.f32 	%f4, %f3;
	mov.f32 	%f5, %f2;
	mov.f32 	%f6, %f4;
	mov.f32 	%f7, 0f00000000;     	// 0
	mov.f32 	%f8, %f7;
	mov.f32 	%f9, 0f00000000;     	// 0
	mov.f32 	%f10, %f9;
	tex.2d.v4.f32.f32 {%f11,%f12,%f13,%f14},[sPointTexture,{%f5,%f6,%f8,%f10}];
	.loc	6	397	0
	mov.f32 	%f15, %f11;
	mov.f32 	%f16, %f12;
	mov.f32 	%f17, %f13;
	mov.f32 	%f18, %f14;
	mov.f32 	%f19, %f15;
	st.param.f32 	[__cudaretf__Z13sampleTextureff+0], %f19;
	mov.f32 	%f20, %f16;
	st.param.f32 	[__cudaretf__Z13sampleTextureff+4], %f20;
	mov.f32 	%f21, %f17;
	st.param.f32 	[__cudaretf__Z13sampleTextureff+8], %f21;
	mov.f32 	%f22, %f18;
	st.param.f32 	[__cudaretf__Z13sampleTextureff+12], %f22;
	ret;
$LDWend__Z13sampleTextureff:
	} // _Z13sampleTextureff

	.visible .func (.param .f32 __cudaretf__Z5GetW0f) _Z5GetW0f (.param .f32 __cudaparmf1__Z5GetW0f)
	{
	.reg .f32 %f<10>;
	.loc	6	401	0
$LDWbegin__Z5GetW0f:
	ld.param.f32 	%f1, [__cudaparmf1__Z5GetW0f];
	mov.f32 	%f2, %f1;
	.loc	6	403	0
	mov.f32 	%f3, 0fbf19999a;     	// -0.6
	mov.f32 	%f4, 0f3f99999a;     	// 1.2
	mov.f32 	%f5, 0fbf19999a;     	// -0.6
	fma.rn.ftz.f32 	%f6, %f5, %f2, %f4;
	fma.rn.ftz.f32 	%f7, %f2, %f6, %f3;
	mul.ftz.f32 	%f8, %f2, %f7;
	st.param.f32 	[__cudaretf__Z5GetW0f], %f8;
	ret;
$LDWend__Z5GetW0f:
	} // _Z5GetW0f

	.visible .func (.param .f32 __cudaretf__Z5GetW1f) _Z5GetW1f (.param .f32 __cudaparmf1__Z5GetW1f)
	{
	.reg .f32 %f<10>;
	.loc	6	406	0
$LDWbegin__Z5GetW1f:
	ld.param.f32 	%f1, [__cudaparmf1__Z5GetW1f];
	mov.f32 	%f2, %f1;
	.loc	6	408	0
	mov.f32 	%f3, 0f3f800000;     	// 1
	mov.f32 	%f4, 0fc019999a;     	// -2.4
	mov.f32 	%f5, 0f3fb33333;     	// 1.4
	fma.rn.ftz.f32 	%f6, %f5, %f2, %f4;
	mul.ftz.f32 	%f7, %f2, %f6;
	fma.rn.ftz.f32 	%f8, %f2, %f7, %f3;
	st.param.f32 	[__cudaretf__Z5GetW1f], %f8;
	ret;
$LDWend__Z5GetW1f:
	} // _Z5GetW1f

	.visible .func (.param .align 16 .b8 __cudaretf__Z12bicubicTex2Dff[16]) _Z12bicubicTex2Dff (.param .f32 __cudaparmf1__Z12bicubicTex2Dff, .param .f32 __cudaparmf2__Z12bicubicTex2Dff)
	{
	.reg .f32 %f<326>;
	.loc	6	413	0
$LDWbegin__Z12bicubicTex2Dff:
	ld.param.f32 	%f1, [__cudaparmf1__Z12bicubicTex2Dff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z12bicubicTex2Dff];
	mov.f32 	%f4, %f3;
	.loc	6	442	0
	mov.f32 	%f5, 0fbf000000;     	// -0.5
	add.ftz.f32 	%f6, %f2, %f5;
	mov.f32 	%f7, 0fbf000000;     	// -0.5
	add.ftz.f32 	%f8, %f4, %f7;
	cvt.rmi.ftz.f32.f32 	%f9, %f6;
	cvt.rmi.ftz.f32.f32 	%f10, %f8;
	mov.f32 	%f11, 0fbf000000;    	// -0.5
	add.ftz.f32 	%f12, %f9, %f11;
	mov.f32 	%f13, 0fbf000000;    	// -0.5
	add.ftz.f32 	%f14, %f10, %f13;
	mov.f32 	%f15, %f12;
	mov.f32 	%f16, %f14;
	mov.f32 	%f17, 0f00000000;    	// 0
	mov.f32 	%f18, %f17;
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, %f19;
	tex.2d.v4.f32.f32 {%f21,%f22,%f23,%f24},[sPointTexture,{%f15,%f16,%f18,%f20}];
	.loc	6	397	0
	mov.f32 	%f25, %f21;
	mov.f32 	%f26, %f22;
	mov.f32 	%f27, %f23;
	mov.f32 	%f28, %f24;
	.loc	6	62	0
	sub.ftz.f32 	%f29, %f6, %f9;
	sub.ftz.f32 	%f30, %f8, %f10;
	mov.f32 	%f31, 0f3f99999a;    	// 1.2
	mov.f32 	%f32, 0fbf19999a;    	// -0.6
	fma.rn.ftz.f32 	%f33, %f32, %f29, %f31;
	mov.f32 	%f34, 0f3f99999a;    	// 1.2
	mov.f32 	%f35, 0fbf19999a;    	// -0.6
	fma.rn.ftz.f32 	%f36, %f35, %f30, %f34;
	mov.f32 	%f37, 0fbf19999a;    	// -0.6
	fma.rn.ftz.f32 	%f38, %f29, %f33, %f37;
	mov.f32 	%f39, 0fbf19999a;    	// -0.6
	fma.rn.ftz.f32 	%f40, %f30, %f36, %f39;
	mul.ftz.f32 	%f41, %f29, %f38;
	mul.ftz.f32 	%f42, %f30, %f40;
	mul.ftz.f32 	%f43, %f41, %f42;
	mul.ftz.f32 	%f44, %f43, %f25;
	.loc	6	63	0
	mul.ftz.f32 	%f45, %f43, %f26;
	.loc	6	64	0
	mul.ftz.f32 	%f46, %f43, %f27;
	.loc	6	65	0
	mul.ftz.f32 	%f47, %f43, %f28;
	.loc	6	443	0
	mov.f32 	%f48, 0f3f800000;    	// 1
	add.ftz.f32 	%f49, %f12, %f48;
	mov.f32 	%f50, %f49;
	mov.f32 	%f51, %f14;
	mov.f32 	%f52, 0f00000000;    	// 0
	mov.f32 	%f53, %f52;
	mov.f32 	%f54, 0f00000000;    	// 0
	mov.f32 	%f55, %f54;
	tex.2d.v4.f32.f32 {%f56,%f57,%f58,%f59},[sPointTexture,{%f50,%f51,%f53,%f55}];
	.loc	6	397	0
	mov.f32 	%f25, %f56;
	mov.f32 	%f26, %f57;
	mov.f32 	%f27, %f58;
	mov.f32 	%f28, %f59;
	.loc	6	53	0
	mov.f32 	%f60, 0fc019999a;    	// -2.4
	mov.f32 	%f61, 0f3fb33333;    	// 1.4
	fma.rn.ftz.f32 	%f62, %f61, %f29, %f60;
	mul.ftz.f32 	%f63, %f29, %f62;
	mov.f32 	%f64, 0f3f800000;    	// 1
	fma.rn.ftz.f32 	%f65, %f29, %f63, %f64;
	mul.ftz.f32 	%f66, %f65, %f42;
	fma.rn.ftz.f32 	%f67, %f25, %f66, %f44;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f68, %f26, %f66, %f45;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f69, %f27, %f66, %f46;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f70, %f28, %f66, %f47;
	.loc	6	444	0
	mov.f32 	%f71, 0f40000000;    	// 2
	add.ftz.f32 	%f72, %f12, %f71;
	mov.f32 	%f73, %f72;
	mov.f32 	%f74, %f14;
	mov.f32 	%f75, 0f00000000;    	// 0
	mov.f32 	%f76, %f75;
	mov.f32 	%f77, 0f00000000;    	// 0
	mov.f32 	%f78, %f77;
	tex.2d.v4.f32.f32 {%f79,%f80,%f81,%f82},[sPointTexture,{%f73,%f74,%f76,%f78}];
	.loc	6	397	0
	mov.f32 	%f25, %f79;
	mov.f32 	%f26, %f80;
	mov.f32 	%f27, %f81;
	mov.f32 	%f28, %f82;
	.loc	6	53	0
	mov.f32 	%f83, 0f3f800000;    	// 1
	sub.ftz.f32 	%f84, %f83, %f29;
	mov.f32 	%f85, 0fc019999a;    	// -2.4
	mov.f32 	%f86, 0f3fb33333;    	// 1.4
	fma.rn.ftz.f32 	%f87, %f86, %f84, %f85;
	mul.ftz.f32 	%f88, %f84, %f87;
	mov.f32 	%f89, 0f3f800000;    	// 1
	fma.rn.ftz.f32 	%f90, %f84, %f88, %f89;
	mul.ftz.f32 	%f91, %f42, %f90;
	fma.rn.ftz.f32 	%f92, %f25, %f91, %f67;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f93, %f26, %f91, %f68;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f94, %f27, %f91, %f69;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f95, %f28, %f91, %f70;
	.loc	6	445	0
	mov.f32 	%f96, 0f40400000;    	// 3
	add.ftz.f32 	%f97, %f12, %f96;
	mov.f32 	%f98, %f97;
	mov.f32 	%f99, %f14;
	mov.f32 	%f100, 0f00000000;   	// 0
	mov.f32 	%f101, %f100;
	mov.f32 	%f102, 0f00000000;   	// 0
	mov.f32 	%f103, %f102;
	tex.2d.v4.f32.f32 {%f104,%f105,%f106,%f107},[sPointTexture,{%f98,%f99,%f101,%f103}];
	.loc	6	397	0
	mov.f32 	%f25, %f104;
	mov.f32 	%f26, %f105;
	mov.f32 	%f27, %f106;
	mov.f32 	%f28, %f107;
	.loc	6	53	0
	mov.f32 	%f108, 0f3f99999a;   	// 1.2
	mov.f32 	%f109, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f110, %f109, %f84, %f108;
	mov.f32 	%f111, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f112, %f84, %f110, %f111;
	mul.ftz.f32 	%f113, %f84, %f112;
	mul.ftz.f32 	%f114, %f42, %f113;
	fma.rn.ftz.f32 	%f115, %f25, %f114, %f92;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f116, %f26, %f114, %f93;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f117, %f27, %f114, %f94;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f118, %f28, %f114, %f95;
	.loc	6	446	0
	mov.f32 	%f119, 0f3f800000;   	// 1
	add.ftz.f32 	%f120, %f14, %f119;
	mov.f32 	%f121, %f12;
	mov.f32 	%f122, %f120;
	mov.f32 	%f123, 0f00000000;   	// 0
	mov.f32 	%f124, %f123;
	mov.f32 	%f125, 0f00000000;   	// 0
	mov.f32 	%f126, %f125;
	tex.2d.v4.f32.f32 {%f127,%f128,%f129,%f130},[sPointTexture,{%f121,%f122,%f124,%f126}];
	.loc	6	397	0
	mov.f32 	%f25, %f127;
	mov.f32 	%f26, %f128;
	mov.f32 	%f27, %f129;
	mov.f32 	%f28, %f130;
	.loc	6	53	0
	mov.f32 	%f131, 0fc019999a;   	// -2.4
	mov.f32 	%f132, 0f3fb33333;   	// 1.4
	fma.rn.ftz.f32 	%f133, %f132, %f30, %f131;
	mul.ftz.f32 	%f134, %f30, %f133;
	mov.f32 	%f135, 0f3f800000;   	// 1
	fma.rn.ftz.f32 	%f136, %f30, %f134, %f135;
	mul.ftz.f32 	%f137, %f41, %f136;
	fma.rn.ftz.f32 	%f138, %f25, %f137, %f115;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f139, %f26, %f137, %f116;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f140, %f27, %f137, %f117;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f141, %f28, %f137, %f118;
	.loc	6	447	0
	mov.f32 	%f142, %f49;
	mov.f32 	%f143, %f120;
	mov.f32 	%f144, 0f00000000;   	// 0
	mov.f32 	%f145, %f144;
	mov.f32 	%f146, 0f00000000;   	// 0
	mov.f32 	%f147, %f146;
	tex.2d.v4.f32.f32 {%f148,%f149,%f150,%f151},[sPointTexture,{%f142,%f143,%f145,%f147}];
	.loc	6	397	0
	mov.f32 	%f25, %f148;
	mov.f32 	%f26, %f149;
	mov.f32 	%f27, %f150;
	mov.f32 	%f28, %f151;
	.loc	6	53	0
	mul.ftz.f32 	%f152, %f65, %f136;
	fma.rn.ftz.f32 	%f153, %f25, %f152, %f138;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f154, %f26, %f152, %f139;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f155, %f27, %f152, %f140;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f156, %f28, %f152, %f141;
	.loc	6	448	0
	mov.f32 	%f157, %f72;
	mov.f32 	%f158, %f120;
	mov.f32 	%f159, 0f00000000;   	// 0
	mov.f32 	%f160, %f159;
	mov.f32 	%f161, 0f00000000;   	// 0
	mov.f32 	%f162, %f161;
	tex.2d.v4.f32.f32 {%f163,%f164,%f165,%f166},[sPointTexture,{%f157,%f158,%f160,%f162}];
	.loc	6	397	0
	mov.f32 	%f25, %f163;
	mov.f32 	%f26, %f164;
	mov.f32 	%f27, %f165;
	mov.f32 	%f28, %f166;
	.loc	6	53	0
	mul.ftz.f32 	%f167, %f136, %f90;
	fma.rn.ftz.f32 	%f168, %f25, %f167, %f153;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f169, %f26, %f167, %f154;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f170, %f27, %f167, %f155;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f171, %f28, %f167, %f156;
	.loc	6	449	0
	mov.f32 	%f172, %f97;
	mov.f32 	%f173, %f120;
	mov.f32 	%f174, 0f00000000;   	// 0
	mov.f32 	%f175, %f174;
	mov.f32 	%f176, 0f00000000;   	// 0
	mov.f32 	%f177, %f176;
	tex.2d.v4.f32.f32 {%f178,%f179,%f180,%f181},[sPointTexture,{%f172,%f173,%f175,%f177}];
	.loc	6	397	0
	mov.f32 	%f25, %f178;
	mov.f32 	%f26, %f179;
	mov.f32 	%f27, %f180;
	mov.f32 	%f28, %f181;
	.loc	6	53	0
	mul.ftz.f32 	%f182, %f136, %f113;
	fma.rn.ftz.f32 	%f183, %f25, %f182, %f168;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f184, %f26, %f182, %f169;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f185, %f27, %f182, %f170;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f186, %f28, %f182, %f171;
	.loc	6	450	0
	mov.f32 	%f187, 0f40000000;   	// 2
	add.ftz.f32 	%f188, %f14, %f187;
	mov.f32 	%f189, %f12;
	mov.f32 	%f190, %f188;
	mov.f32 	%f191, 0f00000000;   	// 0
	mov.f32 	%f192, %f191;
	mov.f32 	%f193, 0f00000000;   	// 0
	mov.f32 	%f194, %f193;
	tex.2d.v4.f32.f32 {%f195,%f196,%f197,%f198},[sPointTexture,{%f189,%f190,%f192,%f194}];
	.loc	6	397	0
	mov.f32 	%f25, %f195;
	mov.f32 	%f26, %f196;
	mov.f32 	%f27, %f197;
	mov.f32 	%f28, %f198;
	.loc	6	53	0
	mov.f32 	%f199, 0f3f800000;   	// 1
	sub.ftz.f32 	%f200, %f199, %f30;
	mov.f32 	%f201, 0fc019999a;   	// -2.4
	mov.f32 	%f202, 0f3fb33333;   	// 1.4
	fma.rn.ftz.f32 	%f203, %f202, %f200, %f201;
	mul.ftz.f32 	%f204, %f200, %f203;
	mov.f32 	%f205, 0f3f800000;   	// 1
	fma.rn.ftz.f32 	%f206, %f200, %f204, %f205;
	mul.ftz.f32 	%f207, %f41, %f206;
	fma.rn.ftz.f32 	%f208, %f25, %f207, %f183;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f209, %f26, %f207, %f184;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f210, %f27, %f207, %f185;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f211, %f28, %f207, %f186;
	.loc	6	451	0
	mov.f32 	%f212, %f49;
	mov.f32 	%f213, %f188;
	mov.f32 	%f214, 0f00000000;   	// 0
	mov.f32 	%f215, %f214;
	mov.f32 	%f216, 0f00000000;   	// 0
	mov.f32 	%f217, %f216;
	tex.2d.v4.f32.f32 {%f218,%f219,%f220,%f221},[sPointTexture,{%f212,%f213,%f215,%f217}];
	.loc	6	397	0
	mov.f32 	%f25, %f218;
	mov.f32 	%f26, %f219;
	mov.f32 	%f27, %f220;
	mov.f32 	%f28, %f221;
	.loc	6	53	0
	mul.ftz.f32 	%f222, %f65, %f206;
	fma.rn.ftz.f32 	%f223, %f25, %f222, %f208;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f224, %f26, %f222, %f209;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f225, %f27, %f222, %f210;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f226, %f28, %f222, %f211;
	.loc	6	452	0
	mov.f32 	%f227, %f72;
	mov.f32 	%f228, %f188;
	mov.f32 	%f229, 0f00000000;   	// 0
	mov.f32 	%f230, %f229;
	mov.f32 	%f231, 0f00000000;   	// 0
	mov.f32 	%f232, %f231;
	tex.2d.v4.f32.f32 {%f233,%f234,%f235,%f236},[sPointTexture,{%f227,%f228,%f230,%f232}];
	.loc	6	397	0
	mov.f32 	%f25, %f233;
	mov.f32 	%f26, %f234;
	mov.f32 	%f27, %f235;
	mov.f32 	%f28, %f236;
	.loc	6	53	0
	mul.ftz.f32 	%f237, %f90, %f206;
	fma.rn.ftz.f32 	%f238, %f25, %f237, %f223;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f239, %f26, %f237, %f224;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f240, %f27, %f237, %f225;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f241, %f28, %f237, %f226;
	.loc	6	453	0
	mov.f32 	%f242, %f97;
	mov.f32 	%f243, %f188;
	mov.f32 	%f244, 0f00000000;   	// 0
	mov.f32 	%f245, %f244;
	mov.f32 	%f246, 0f00000000;   	// 0
	mov.f32 	%f247, %f246;
	tex.2d.v4.f32.f32 {%f248,%f249,%f250,%f251},[sPointTexture,{%f242,%f243,%f245,%f247}];
	.loc	6	397	0
	mov.f32 	%f25, %f248;
	mov.f32 	%f26, %f249;
	mov.f32 	%f27, %f250;
	mov.f32 	%f28, %f251;
	.loc	6	53	0
	mul.ftz.f32 	%f252, %f113, %f206;
	fma.rn.ftz.f32 	%f253, %f25, %f252, %f238;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f254, %f26, %f252, %f239;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f255, %f27, %f252, %f240;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f256, %f28, %f252, %f241;
	.loc	6	454	0
	mov.f32 	%f257, 0f40400000;   	// 3
	add.ftz.f32 	%f258, %f14, %f257;
	mov.f32 	%f259, %f12;
	mov.f32 	%f260, %f258;
	mov.f32 	%f261, 0f00000000;   	// 0
	mov.f32 	%f262, %f261;
	mov.f32 	%f263, 0f00000000;   	// 0
	mov.f32 	%f264, %f263;
	tex.2d.v4.f32.f32 {%f265,%f266,%f267,%f268},[sPointTexture,{%f259,%f260,%f262,%f264}];
	.loc	6	397	0
	mov.f32 	%f25, %f265;
	mov.f32 	%f26, %f266;
	mov.f32 	%f27, %f267;
	mov.f32 	%f28, %f268;
	.loc	6	53	0
	mov.f32 	%f269, 0f3f99999a;   	// 1.2
	mov.f32 	%f270, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f271, %f270, %f200, %f269;
	mov.f32 	%f272, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f273, %f200, %f271, %f272;
	mul.ftz.f32 	%f274, %f200, %f273;
	mul.ftz.f32 	%f275, %f41, %f274;
	fma.rn.ftz.f32 	%f276, %f25, %f275, %f253;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f277, %f26, %f275, %f254;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f278, %f27, %f275, %f255;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f279, %f28, %f275, %f256;
	.loc	6	455	0
	mov.f32 	%f280, %f49;
	mov.f32 	%f281, %f258;
	mov.f32 	%f282, 0f00000000;   	// 0
	mov.f32 	%f283, %f282;
	mov.f32 	%f284, 0f00000000;   	// 0
	mov.f32 	%f285, %f284;
	tex.2d.v4.f32.f32 {%f286,%f287,%f288,%f289},[sPointTexture,{%f280,%f281,%f283,%f285}];
	.loc	6	397	0
	mov.f32 	%f25, %f286;
	mov.f32 	%f26, %f287;
	mov.f32 	%f27, %f288;
	mov.f32 	%f28, %f289;
	.loc	6	53	0
	mul.ftz.f32 	%f290, %f65, %f274;
	fma.rn.ftz.f32 	%f291, %f25, %f290, %f276;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f292, %f26, %f290, %f277;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f293, %f27, %f290, %f278;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f294, %f28, %f290, %f279;
	.loc	6	456	0
	mov.f32 	%f295, %f72;
	mov.f32 	%f296, %f258;
	mov.f32 	%f297, 0f00000000;   	// 0
	mov.f32 	%f298, %f297;
	mov.f32 	%f299, 0f00000000;   	// 0
	mov.f32 	%f300, %f299;
	tex.2d.v4.f32.f32 {%f301,%f302,%f303,%f304},[sPointTexture,{%f295,%f296,%f298,%f300}];
	.loc	6	397	0
	mov.f32 	%f25, %f301;
	mov.f32 	%f26, %f302;
	mov.f32 	%f27, %f303;
	mov.f32 	%f28, %f304;
	.loc	6	53	0
	mul.ftz.f32 	%f305, %f90, %f274;
	fma.rn.ftz.f32 	%f306, %f25, %f305, %f291;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f307, %f26, %f305, %f292;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f308, %f27, %f305, %f293;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f309, %f28, %f305, %f294;
	.loc	6	457	0
	mov.f32 	%f310, %f97;
	mov.f32 	%f311, %f258;
	mov.f32 	%f312, 0f00000000;   	// 0
	mov.f32 	%f313, %f312;
	mov.f32 	%f314, 0f00000000;   	// 0
	mov.f32 	%f315, %f314;
	tex.2d.v4.f32.f32 {%f316,%f317,%f318,%f319},[sPointTexture,{%f310,%f311,%f313,%f315}];
	.loc	6	397	0
	mov.f32 	%f25, %f316;
	mov.f32 	%f26, %f317;
	mov.f32 	%f27, %f318;
	mov.f32 	%f28, %f319;
	.loc	6	459	0
	mul.ftz.f32 	%f320, %f113, %f274;
	fma.rn.ftz.f32 	%f321, %f25, %f320, %f306;
	st.param.f32 	[__cudaretf__Z12bicubicTex2Dff+0], %f321;
	fma.rn.ftz.f32 	%f322, %f26, %f320, %f307;
	st.param.f32 	[__cudaretf__Z12bicubicTex2Dff+4], %f322;
	fma.rn.ftz.f32 	%f323, %f27, %f320, %f308;
	st.param.f32 	[__cudaretf__Z12bicubicTex2Dff+8], %f323;
	fma.rn.ftz.f32 	%f324, %f28, %f320, %f309;
	st.param.f32 	[__cudaretf__Z12bicubicTex2Dff+12], %f324;
	ret;
$LDWend__Z12bicubicTex2Dff:
	} // _Z12bicubicTex2Dff

	.visible .func (.param .align 8 .b8 __cudaretf__Z9ComputeUV6float3S_S_S_ff[8]) _Z9ComputeUV6float3S_S_S_ff (.param .align 4 .b8 __cudaparmf1__Z9ComputeUV6float3S_S_S_ff[12], .param .align 4 .b8 __cudaparmf2__Z9ComputeUV6float3S_S_S_ff[12], .param .align 4 .b8 __cudaparmf3__Z9ComputeUV6float3S_S_S_ff[12], .param .align 4 .b8 __cudaparmf4__Z9ComputeUV6float3S_S_S_ff[12], .param .f32 __cudaparmf5__Z9ComputeUV6float3S_S_S_ff, .param .f32 __cudaparmf6__Z9ComputeUV6float3S_S_S_ff)
	{
	.reg .f32 %f<76>;
	.loc	6	465	0
$LDWbegin__Z9ComputeUV6float3S_S_S_ff:
	ld.param.f32 	%f1, [__cudaparmf1__Z9ComputeUV6float3S_S_S_ff+0];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf1__Z9ComputeUV6float3S_S_S_ff+4];
	mov.f32 	%f4, %f3;
	ld.param.f32 	%f5, [__cudaparmf1__Z9ComputeUV6float3S_S_S_ff+8];
	mov.f32 	%f6, %f5;
	ld.param.f32 	%f7, [__cudaparmf2__Z9ComputeUV6float3S_S_S_ff+0];
	mov.f32 	%f8, %f7;
	ld.param.f32 	%f9, [__cudaparmf2__Z9ComputeUV6float3S_S_S_ff+4];
	mov.f32 	%f10, %f9;
	ld.param.f32 	%f11, [__cudaparmf2__Z9ComputeUV6float3S_S_S_ff+8];
	mov.f32 	%f12, %f11;
	ld.param.f32 	%f13, [__cudaparmf3__Z9ComputeUV6float3S_S_S_ff+0];
	mov.f32 	%f14, %f13;
	ld.param.f32 	%f15, [__cudaparmf3__Z9ComputeUV6float3S_S_S_ff+4];
	mov.f32 	%f16, %f15;
	ld.param.f32 	%f17, [__cudaparmf3__Z9ComputeUV6float3S_S_S_ff+8];
	mov.f32 	%f18, %f17;
	ld.param.f32 	%f19, [__cudaparmf4__Z9ComputeUV6float3S_S_S_ff+0];
	mov.f32 	%f20, %f19;
	ld.param.f32 	%f21, [__cudaparmf4__Z9ComputeUV6float3S_S_S_ff+4];
	mov.f32 	%f22, %f21;
	ld.param.f32 	%f23, [__cudaparmf4__Z9ComputeUV6float3S_S_S_ff+8];
	mov.f32 	%f24, %f23;
	ld.param.f32 	%f25, [__cudaparmf5__Z9ComputeUV6float3S_S_S_ff];
	mov.f32 	%f26, %f25;
	ld.param.f32 	%f27, [__cudaparmf6__Z9ComputeUV6float3S_S_S_ff];
	mov.f32 	%f28, %f27;
	.loc	6	478	0
	sub.ftz.f32 	%f29, %f28, %f4;
	sub.ftz.f32 	%f30, %f26, %f2;
	neg.ftz.f32 	%f31, %f6;
	mul.ftz.f32 	%f32, %f29, %f29;
	fma.rn.ftz.f32 	%f33, %f30, %f30, %f32;
	fma.rn.ftz.f32 	%f34, %f31, %f31, %f33;
	rsqrt.approx.ftz.f32 	%f35, %f34;
	mul.ftz.f32 	%f36, %f30, %f35;
	mul.ftz.f32 	%f37, %f35, %f6;
	mul.ftz.f32 	%f38, %f29, %f35;
	mul.ftz.f32 	%f39, %f36, %f24;
	neg.ftz.f32 	%f40, %f37;
	mul.ftz.f32 	%f41, %f37, %f22;
	mul.ftz.f32 	%f42, %f38, %f20;
	mul.ftz.f32 	%f43, %f20, %f40;
	sub.ftz.f32 	%f44, %f43, %f39;
	neg.ftz.f32 	%f45, %f41;
	mul.ftz.f32 	%f46, %f36, %f22;
	sub.ftz.f32 	%f47, %f46, %f42;
	mul.ftz.f32 	%f48, %f38, %f24;
	sub.ftz.f32 	%f49, %f48, %f45;
	mul.ftz.f32 	%f50, %f44, %f16;
	fma.rn.ftz.f32 	%f51, %f49, %f14, %f50;
	fma.rn.ftz.f32 	%f52, %f47, %f18, %f51;
	rcp.approx.ftz.f32 	%f53, %f52;
	.loc	6	481	0
	sub.ftz.f32 	%f54, %f4, %f10;
	sub.ftz.f32 	%f55, %f2, %f8;
	sub.ftz.f32 	%f56, %f6, %f12;
	mul.ftz.f32 	%f57, %f54, %f44;
	fma.rn.ftz.f32 	%f58, %f49, %f55, %f57;
	fma.rn.ftz.f32 	%f59, %f47, %f56, %f58;
	mul.ftz.f32 	%f60, %f53, %f59;
	.loc	6	486	0
	mov.f32 	%f61, %f60;
	st.param.f32 	[__cudaretf__Z9ComputeUV6float3S_S_S_ff+0], %f61;
	mul.ftz.f32 	%f62, %f55, %f18;
	mul.ftz.f32 	%f63, %f14, %f56;
	sub.ftz.f32 	%f64, %f63, %f62;
	mul.ftz.f32 	%f65, %f38, %f64;
	mul.ftz.f32 	%f66, %f56, %f16;
	mul.ftz.f32 	%f67, %f54, %f18;
	sub.ftz.f32 	%f68, %f67, %f66;
	fma.rn.ftz.f32 	%f69, %f68, %f36, %f65;
	mul.ftz.f32 	%f70, %f54, %f14;
	mul.ftz.f32 	%f71, %f55, %f16;
	sub.ftz.f32 	%f72, %f71, %f70;
	fma.rn.ftz.f32 	%f73, %f72, %f40, %f69;
	mul.ftz.f32 	%f74, %f53, %f73;
	st.param.f32 	[__cudaretf__Z9ComputeUV6float3S_S_S_ff+4], %f74;
	ret;
$LDWend__Z9ComputeUV6float3S_S_S_ff:
	} // _Z9ComputeUV6float3S_S_S_ff

	.visible .func (.param .f32 __cudaretf__Z24CubicInterpolationKernelff) _Z24CubicInterpolationKernelff (.param .f32 __cudaparmf1__Z24CubicInterpolationKernelff, .param .f32 __cudaparmf2__Z24CubicInterpolationKernelff)
	{
	.reg .f32 %f<28>;
	.reg .pred %p<4>;
	.loc	6	608	0
$LDWbegin__Z24CubicInterpolationKernelff:
	ld.param.f32 	%f1, [__cudaparmf1__Z24CubicInterpolationKernelff];
	mov.f32 	%f2, %f1;
	ld.param.f32 	%f3, [__cudaparmf2__Z24CubicInterpolationKernelff];
	mov.f32 	%f4, %f3;
	abs.ftz.f32 	%f5, %f2;
	mov.f32 	%f6, 0f40000000;     	// 2
	setp.ge.ftz.f32 	%p1, %f5, %f6;
	@!%p1 bra 	$Lt_95_2050;
	mov.f32 	%f7, 0f00000000;     	// 0
	bra.uni 	$Lt_95_1794;
$Lt_95_2050:
	mov.f32 	%f8, 0f3f800000;     	// 1
	setp.ge.ftz.f32 	%p2, %f5, %f8;
	@!%p2 bra 	$Lt_95_2562;
	.loc	6	620	0
	mov.f32 	%f9, 0f40800000;     	// 4
	mul.ftz.f32 	%f10, %f4, %f9;
	mov.f32 	%f11, 0f41000000;    	// 8
	mul.ftz.f32 	%f12, %f4, %f11;
	mov.f32 	%f13, 0fc0a00000;    	// -5
	add.ftz.f32 	%f14, %f5, %f13;
	mul.ftz.f32 	%f15, %f4, %f14;
	fma.rn.ftz.f32 	%f16, %f5, %f15, %f12;
	mul.ftz.f32 	%f17, %f5, %f16;
	sub.ftz.f32 	%f7, %f17, %f10;
	bra.uni 	$Lt_95_2306;
$Lt_95_2562:
	.loc	6	624	0
	mov.f32 	%f18, 0f3f800000;    	// 1
	mov.f32 	%f19, 0f40400000;    	// 3
	add.ftz.f32 	%f20, %f4, %f19;
	mov.f32 	%f21, 0f40000000;    	// 2
	add.ftz.f32 	%f22, %f4, %f21;
	mul.ftz.f32 	%f23, %f5, %f22;
	sub.ftz.f32 	%f24, %f23, %f20;
	mul.ftz.f32 	%f25, %f5, %f24;
	fma.rn.ftz.f32 	%f7, %f5, %f25, %f18;
$Lt_95_2306:
$Lt_95_1794:
	.loc	6	627	0
	mov.f32 	%f26, %f7;
	st.param.f32 	[__cudaretf__Z24CubicInterpolationKernelff], %f26;
	ret;
$LDWend__Z24CubicInterpolationKernelff:
	} // _Z24CubicInterpolationKernelff

	.entry BlendMode_IR_BlendMode_Normal_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<131>;
	.reg .pred %p<15>;
	.loc	22	468	0
$LDWbegin_BlendMode_IR_BlendMode_Normal_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_96_19714;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inSrc0];
	@!%p2 bra 	$Lt_96_20482;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	468	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_96_20226;
$Lt_96_20482:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_96_20226:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inSrc1];
	@!%p2 bra 	$Lt_96_20994;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	468	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_96_20738;
$Lt_96_20994:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_96_20738:
	.loc	5	255	0
	mov.f32 	%f9, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f1, %f9;
	@!%p3 bra 	$Lt_96_21250;
	.loc	5	234	0
	neg.ftz.f32 	%f10, %f1;
	lg2.approx.ftz.f32 	%f11, %f10;
	mov.f32 	%f12, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f13, %f11, %f12;
	ex2.approx.ftz.f32 	%f14, %f13;
	neg.ftz.f32 	%f15, %f14;
	bra.uni 	$LDWendi___log2f_273_17;
$Lt_96_21250:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f16, %f1;
	mov.f32 	%f17, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f18, %f16, %f17;
	ex2.approx.ftz.f32 	%f15, %f18;
$LDWendi___log2f_273_17:
	.loc	5	256	0
	mov.f32 	%f19, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f19;
	@!%p4 bra 	$Lt_96_21762;
	.loc	5	234	0
	neg.ftz.f32 	%f20, %f2;
	lg2.approx.ftz.f32 	%f21, %f20;
	mov.f32 	%f22, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f23, %f21, %f22;
	ex2.approx.ftz.f32 	%f24, %f23;
	neg.ftz.f32 	%f25, %f24;
	bra.uni 	$LDWendi___log2f_273_15;
$Lt_96_21762:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f26, %f2;
	mov.f32 	%f27, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f28, %f26, %f27;
	ex2.approx.ftz.f32 	%f25, %f28;
$LDWendi___log2f_273_15:
	.loc	5	257	0
	mov.f32 	%f29, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f3, %f29;
	@!%p5 bra 	$Lt_96_22274;
	.loc	5	234	0
	neg.ftz.f32 	%f30, %f3;
	lg2.approx.ftz.f32 	%f31, %f30;
	mov.f32 	%f32, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f33, %f31, %f32;
	ex2.approx.ftz.f32 	%f34, %f33;
	neg.ftz.f32 	%f35, %f34;
	bra.uni 	$LDWendi___log2f_273_13;
$Lt_96_22274:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f36, %f3;
	mov.f32 	%f37, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f38, %f36, %f37;
	ex2.approx.ftz.f32 	%f35, %f38;
$LDWendi___log2f_273_13:
	.loc	22	83	0
	cvt.ftz.sat.f32.f32 	%f39, %f4;
	cvt.ftz.sat.f32.f32 	%f40, %f8;
	ld.param.f32 	%f41, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inAlphaGain];
	mul.ftz.f32 	%f42, %f41, %f39;
	cvt.ftz.sat.f32.f32 	%f43, %f42;
	mov.f32 	%f44, 0f3f800000;    	// 1
	sub.ftz.f32 	%f45, %f44, %f43;
	mul.ftz.f32 	%f46, %f40, %f45;
	add.ftz.f32 	%f47, %f43, %f46;
	mov.f32 	%f48, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f49, %f47, %f48;
	mov.f32 	%f50, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p6, %f49, %f50;
	@!%p6 bra 	$Lt_96_23042;
	mov.f32 	%f51, 0f00000000;    	// 0
	mov.f32 	%f52, 0f00000000;    	// 0
	mov.f32 	%f53, 0f00000000;    	// 0
	mov.f32 	%f54, 0f00000000;    	// 0
	bra.uni 	$Lt_96_22786;
$Lt_96_23042:
	mov.f32 	%f55, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p7, %f5, %f55;
	@!%p7 bra 	$Lt_96_23298;
	.loc	5	234	0
	neg.ftz.f32 	%f56, %f5;
	lg2.approx.ftz.f32 	%f57, %f56;
	mov.f32 	%f58, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f59, %f57, %f58;
	ex2.approx.ftz.f32 	%f60, %f59;
	neg.ftz.f32 	%f61, %f60;
	bra.uni 	$LDWendi___log2f_273_11;
$Lt_96_23298:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f62, %f5;
	mov.f32 	%f63, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f64, %f62, %f63;
	ex2.approx.ftz.f32 	%f61, %f64;
$LDWendi___log2f_273_11:
	.loc	22	97	0
	mov.f32 	%f65, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p8, %f6, %f65;
	@!%p8 bra 	$Lt_96_23810;
	.loc	5	234	0
	neg.ftz.f32 	%f66, %f6;
	lg2.approx.ftz.f32 	%f67, %f66;
	mov.f32 	%f68, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f69, %f67, %f68;
	ex2.approx.ftz.f32 	%f70, %f69;
	neg.ftz.f32 	%f71, %f70;
	bra.uni 	$LDWendi___log2f_273_9;
$Lt_96_23810:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f72, %f6;
	mov.f32 	%f73, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f74, %f72, %f73;
	ex2.approx.ftz.f32 	%f71, %f74;
$LDWendi___log2f_273_9:
	.loc	22	98	0
	mov.f32 	%f75, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p9, %f7, %f75;
	@!%p9 bra 	$Lt_96_24322;
	.loc	5	234	0
	neg.ftz.f32 	%f76, %f7;
	lg2.approx.ftz.f32 	%f77, %f76;
	mov.f32 	%f78, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f79, %f77, %f78;
	ex2.approx.ftz.f32 	%f80, %f79;
	neg.ftz.f32 	%f81, %f80;
	bra.uni 	$LDWendi___log2f_273_7;
$Lt_96_24322:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f82, %f7;
	mov.f32 	%f83, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f84, %f82, %f83;
	ex2.approx.ftz.f32 	%f81, %f84;
$LDWendi___log2f_273_7:
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f85, %f47;
	mov.f32 	%f86, %f85;
	mov.f32 	%f87, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f88, %f85, %f87;
	mov.f32 	%f89, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p10, %f88, %f89;
	@%p10 bra 	$Lt_96_25090;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f90, %f85;
	mul.ftz.f32 	%f91, %f35, %f43;
	fma.rn.ftz.f32 	%f92, %f46, %f81, %f91;
	mul.ftz.f32 	%f93, %f90, %f92;
	.loc	5	214	0
	mul.ftz.f32 	%f94, %f25, %f43;
	fma.rn.ftz.f32 	%f95, %f46, %f71, %f94;
	mul.ftz.f32 	%f96, %f90, %f95;
	.loc	5	215	0
	mul.ftz.f32 	%f97, %f15, %f43;
	fma.rn.ftz.f32 	%f98, %f46, %f61, %f97;
	mul.ftz.f32 	%f99, %f90, %f98;
	bra.uni 	$Lt_96_24834;
$Lt_96_25090:
	.loc	5	219	0
	mov.f32 	%f93, 0f00000000;    	// 0
	mov.f32 	%f96, 0f00000000;    	// 0
	mov.f32 	%f99, 0f00000000;    	// 0
	mov.f32 	%f86, 0f00000000;    	// 0
$Lt_96_24834:
	.loc	5	266	0
	mov.f32 	%f100, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p11, %f99, %f100;
	@!%p11 bra 	$Lt_96_25346;
	.loc	5	242	0
	neg.ftz.f32 	%f101, %f99;
	lg2.approx.ftz.f32 	%f102, %f101;
	mov.f32 	%f103, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f104, %f102, %f103;
	ex2.approx.ftz.f32 	%f105, %f104;
	neg.ftz.f32 	%f106, %f105;
	bra.uni 	$LDWendi___log2f_273_5;
$Lt_96_25346:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f107, %f99;
	mov.f32 	%f108, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f109, %f107, %f108;
	ex2.approx.ftz.f32 	%f106, %f109;
$LDWendi___log2f_273_5:
	.loc	5	267	0
	mov.f32 	%f110, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p12, %f96, %f110;
	@!%p12 bra 	$Lt_96_25858;
	.loc	5	242	0
	neg.ftz.f32 	%f111, %f96;
	lg2.approx.ftz.f32 	%f112, %f111;
	mov.f32 	%f113, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f114, %f112, %f113;
	ex2.approx.ftz.f32 	%f115, %f114;
	neg.ftz.f32 	%f116, %f115;
	bra.uni 	$LDWendi___log2f_273_3;
$Lt_96_25858:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f117, %f96;
	mov.f32 	%f118, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f119, %f117, %f118;
	ex2.approx.ftz.f32 	%f116, %f119;
$LDWendi___log2f_273_3:
	.loc	5	268	0
	mov.f32 	%f120, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p13, %f93, %f120;
	@!%p13 bra 	$Lt_96_26370;
	.loc	5	242	0
	neg.ftz.f32 	%f121, %f93;
	lg2.approx.ftz.f32 	%f122, %f121;
	mov.f32 	%f123, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f124, %f122, %f123;
	ex2.approx.ftz.f32 	%f125, %f124;
	neg.ftz.f32 	%f126, %f125;
	bra.uni 	$LDWendi___log2f_273_1;
$Lt_96_26370:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f127, %f93;
	mov.f32 	%f128, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f129, %f127, %f128;
	ex2.approx.ftz.f32 	%f126, %f129;
$LDWendi___log2f_273_1:
	.loc	22	101	0
	mov.f32 	%f54, %f106;
	mov.f32 	%f53, %f116;
	mov.f32 	%f52, %f126;
	mov.f32 	%f51, %f86;
$Lt_96_22786:
	.loc	22	468	0
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Normal_Kernel_inDest];
	@!%p2 bra 	$Lt_96_27138;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f54;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f53;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f52;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f51;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	468	0
	bra.uni 	$Lt_96_26882;
$Lt_96_27138:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f54,%f53,%f52,%f51};
$Lt_96_26882:
$Lt_96_19714:
	.loc	22	468	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Normal_Kernel:
	} // BlendMode_IR_BlendMode_Normal_Kernel

	.entry BlendMode_IR_BlendMode_Darken_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<41>;
	.reg .pred %p<5>;
	.loc	22	469	0
$LDWbegin_BlendMode_IR_BlendMode_Darken_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_97_21250;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inSrc0];
	@!%p2 bra 	$Lt_97_22018;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	469	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_97_21762;
$Lt_97_22018:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_97_21762:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inSrc1];
	@!%p2 bra 	$Lt_97_22530;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	469	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_97_22274;
$Lt_97_22530:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_97_22274:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_97_23042;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_97_22786;
$Lt_97_23042:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	min.ftz.f32 	%f28, %f1, %f5;
	mul.ftz.f32 	%f29, %f8, %f28;
	fma.rn.ftz.f32 	%f30, %f1, %f22, %f29;
	mul.ftz.f32 	%f31, %f24, %f30;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f31;
	min.ftz.f32 	%f32, %f2, %f6;
	mul.ftz.f32 	%f33, %f8, %f32;
	fma.rn.ftz.f32 	%f34, %f2, %f22, %f33;
	mul.ftz.f32 	%f35, %f24, %f34;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f35;
	min.ftz.f32 	%f36, %f3, %f7;
	mul.ftz.f32 	%f37, %f8, %f36;
	fma.rn.ftz.f32 	%f38, %f3, %f22, %f37;
	mul.ftz.f32 	%f39, %f24, %f38;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f39;
$Lt_97_22786:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Darken_Kernel_inDest];
	@!%p2 bra 	$Lt_97_23554;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	469	0
	bra.uni 	$Lt_97_23298;
$Lt_97_23554:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_97_23298:
$Lt_97_21250:
	.loc	22	469	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Darken_Kernel:
	} // BlendMode_IR_BlendMode_Darken_Kernel

	.entry BlendMode_IR_BlendMode_Lighten_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<41>;
	.reg .pred %p<5>;
	.loc	22	470	0
$LDWbegin_BlendMode_IR_BlendMode_Lighten_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_98_21250;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inSrc0];
	@!%p2 bra 	$Lt_98_22018;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	470	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_98_21762;
$Lt_98_22018:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_98_21762:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inSrc1];
	@!%p2 bra 	$Lt_98_22530;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	470	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_98_22274;
$Lt_98_22530:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_98_22274:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_98_23042;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_98_22786;
$Lt_98_23042:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	max.ftz.f32 	%f28, %f1, %f5;
	mul.ftz.f32 	%f29, %f8, %f28;
	fma.rn.ftz.f32 	%f30, %f1, %f22, %f29;
	mul.ftz.f32 	%f31, %f24, %f30;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f31;
	max.ftz.f32 	%f32, %f2, %f6;
	mul.ftz.f32 	%f33, %f8, %f32;
	fma.rn.ftz.f32 	%f34, %f2, %f22, %f33;
	mul.ftz.f32 	%f35, %f24, %f34;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f35;
	max.ftz.f32 	%f36, %f3, %f7;
	mul.ftz.f32 	%f37, %f8, %f36;
	fma.rn.ftz.f32 	%f38, %f3, %f22, %f37;
	mul.ftz.f32 	%f39, %f24, %f38;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f39;
$Lt_98_22786:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Lighten_Kernel_inDest];
	@!%p2 bra 	$Lt_98_23554;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	470	0
	bra.uni 	$Lt_98_23298;
$Lt_98_23554:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_98_23298:
$Lt_98_21250:
	.loc	22	470	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Lighten_Kernel:
	} // BlendMode_IR_BlendMode_Lighten_Kernel

	.entry BlendMode_IR_BlendMode_Multiply_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<53>;
	.reg .pred %p<5>;
	.loc	22	471	0
$LDWbegin_BlendMode_IR_BlendMode_Multiply_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_99_22786;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inSrc0];
	@!%p2 bra 	$Lt_99_23554;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	471	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_99_23298;
$Lt_99_23554:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_99_23298:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inSrc1];
	@!%p2 bra 	$Lt_99_24066;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	471	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_99_23810;
$Lt_99_24066:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_99_23810:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_99_24578;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_99_24322;
$Lt_99_24578:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	mul.ftz.f32 	%f28, %f1, %f5;
	mov.f32 	%f29, 0f00000000;    	// 0
	max.ftz.f32 	%f30, %f28, %f29;
	mov.f32 	%f31, 0f3f800000;    	// 1
	min.ftz.f32 	%f32, %f30, %f31;
	mul.ftz.f32 	%f33, %f8, %f32;
	fma.rn.ftz.f32 	%f34, %f1, %f22, %f33;
	mul.ftz.f32 	%f35, %f24, %f34;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f35;
	mul.ftz.f32 	%f36, %f2, %f6;
	mov.f32 	%f37, 0f00000000;    	// 0
	max.ftz.f32 	%f38, %f36, %f37;
	mov.f32 	%f39, 0f3f800000;    	// 1
	min.ftz.f32 	%f40, %f38, %f39;
	mul.ftz.f32 	%f41, %f8, %f40;
	fma.rn.ftz.f32 	%f42, %f2, %f22, %f41;
	mul.ftz.f32 	%f43, %f24, %f42;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f43;
	mul.ftz.f32 	%f44, %f3, %f7;
	mov.f32 	%f45, 0f00000000;    	// 0
	max.ftz.f32 	%f46, %f44, %f45;
	mov.f32 	%f47, 0f3f800000;    	// 1
	min.ftz.f32 	%f48, %f46, %f47;
	mul.ftz.f32 	%f49, %f8, %f48;
	fma.rn.ftz.f32 	%f50, %f3, %f22, %f49;
	mul.ftz.f32 	%f51, %f24, %f50;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f51;
$Lt_99_24322:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Multiply_Kernel_inDest];
	@!%p2 bra 	$Lt_99_25090;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	471	0
	bra.uni 	$Lt_99_24834;
$Lt_99_25090:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_99_24834:
$Lt_99_22786:
	.loc	22	471	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Multiply_Kernel:
	} // BlendMode_IR_BlendMode_Multiply_Kernel

	.entry BlendMode_IR_BlendMode_Screen_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<95>;
	.reg .pred %p<5>;
	.loc	22	472	0
$LDWbegin_BlendMode_IR_BlendMode_Screen_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_100_28930;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inSrc0];
	@!%p2 bra 	$Lt_100_29698;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	472	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_100_29442;
$Lt_100_29698:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_100_29442:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inSrc1];
	@!%p2 bra 	$Lt_100_30210;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	472	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_100_29954;
$Lt_100_30210:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_100_29954:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_100_30722;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_100_30466;
$Lt_100_30722:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f3f800000;    	// 1
	mov.f32 	%f29, 0f3f800000;    	// 1
	mov.f32 	%f30, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f31, %f5, %f30;
	mov.f32 	%f32, 0f3f800000;    	// 1
	min.ftz.f32 	%f33, %f31, %f32;
	sub.ftz.f32 	%f34, %f29, %f33;
	mov.f32 	%f35, 0f3f800000;    	// 1
	mov.f32 	%f36, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f37, %f1, %f36;
	mov.f32 	%f38, 0f3f800000;    	// 1
	min.ftz.f32 	%f39, %f37, %f38;
	sub.ftz.f32 	%f40, %f35, %f39;
	mul.ftz.f32 	%f41, %f34, %f40;
	sub.ftz.f32 	%f42, %f28, %f41;
	mov.f32 	%f43, 0f00000000;    	// 0
	max.ftz.f32 	%f44, %f42, %f43;
	mov.f32 	%f45, 0f3f800000;    	// 1
	min.ftz.f32 	%f46, %f44, %f45;
	mul.ftz.f32 	%f47, %f8, %f46;
	fma.rn.ftz.f32 	%f48, %f1, %f22, %f47;
	mul.ftz.f32 	%f49, %f24, %f48;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f49;
	mov.f32 	%f50, 0f3f800000;    	// 1
	mov.f32 	%f51, 0f3f800000;    	// 1
	mov.f32 	%f52, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f53, %f6, %f52;
	mov.f32 	%f54, 0f3f800000;    	// 1
	min.ftz.f32 	%f55, %f53, %f54;
	sub.ftz.f32 	%f56, %f51, %f55;
	mov.f32 	%f57, 0f3f800000;    	// 1
	mov.f32 	%f58, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f59, %f2, %f58;
	mov.f32 	%f60, 0f3f800000;    	// 1
	min.ftz.f32 	%f61, %f59, %f60;
	sub.ftz.f32 	%f62, %f57, %f61;
	mul.ftz.f32 	%f63, %f56, %f62;
	sub.ftz.f32 	%f64, %f50, %f63;
	mov.f32 	%f65, 0f00000000;    	// 0
	max.ftz.f32 	%f66, %f64, %f65;
	mov.f32 	%f67, 0f3f800000;    	// 1
	min.ftz.f32 	%f68, %f66, %f67;
	mul.ftz.f32 	%f69, %f8, %f68;
	fma.rn.ftz.f32 	%f70, %f2, %f22, %f69;
	mul.ftz.f32 	%f71, %f24, %f70;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f71;
	mov.f32 	%f72, 0f3f800000;    	// 1
	mov.f32 	%f73, 0f3f800000;    	// 1
	mov.f32 	%f74, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f75, %f7, %f74;
	mov.f32 	%f76, 0f3f800000;    	// 1
	min.ftz.f32 	%f77, %f75, %f76;
	sub.ftz.f32 	%f78, %f73, %f77;
	mov.f32 	%f79, 0f3f800000;    	// 1
	mov.f32 	%f80, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f81, %f3, %f80;
	mov.f32 	%f82, 0f3f800000;    	// 1
	min.ftz.f32 	%f83, %f81, %f82;
	sub.ftz.f32 	%f84, %f79, %f83;
	mul.ftz.f32 	%f85, %f78, %f84;
	sub.ftz.f32 	%f86, %f72, %f85;
	mov.f32 	%f87, 0f00000000;    	// 0
	max.ftz.f32 	%f88, %f86, %f87;
	mov.f32 	%f89, 0f3f800000;    	// 1
	min.ftz.f32 	%f90, %f88, %f89;
	mul.ftz.f32 	%f91, %f8, %f90;
	fma.rn.ftz.f32 	%f92, %f3, %f22, %f91;
	mul.ftz.f32 	%f93, %f24, %f92;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f93;
$Lt_100_30466:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Screen_Kernel_inDest];
	@!%p2 bra 	$Lt_100_31234;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	472	0
	bra.uni 	$Lt_100_30978;
$Lt_100_31234:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_100_30978:
$Lt_100_28930:
	.loc	22	472	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Screen_Kernel:
	} // BlendMode_IR_BlendMode_Screen_Kernel

	.entry BlendMode_IR_BlendMode_ColorBurn_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<89>;
	.reg .pred %p<5>;
	.loc	22	473	0
$LDWbegin_BlendMode_IR_BlendMode_ColorBurn_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_101_28930;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inSrc0];
	@!%p2 bra 	$Lt_101_29698;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	473	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_101_29442;
$Lt_101_29698:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_101_29442:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inSrc1];
	@!%p2 bra 	$Lt_101_30210;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	473	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_101_29954;
$Lt_101_30210:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_101_29954:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_101_30722;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_101_30466;
$Lt_101_30722:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f3f800000;    	// 1
	mov.f32 	%f29, 0f3f800000;    	// 1
	mov.f32 	%f30, 0f00000000;    	// 0
	max.ftz.f32 	%f31, %f5, %f30;
	mov.f32 	%f32, 0f3f800000;    	// 1
	min.ftz.f32 	%f33, %f31, %f32;
	sub.ftz.f32 	%f34, %f29, %f33;
	mov.f32 	%f35, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f36, %f1, %f35;
	mov.f32 	%f37, 0f3f800000;    	// 1
	min.ftz.f32 	%f38, %f36, %f37;
	div.approx.ftz.f32 	%f39, %f34, %f38;
	sub.ftz.f32 	%f40, %f28, %f39;
	mov.f32 	%f41, 0f00000000;    	// 0
	max.ftz.f32 	%f42, %f40, %f41;
	mov.f32 	%f43, 0f3f800000;    	// 1
	min.ftz.f32 	%f44, %f42, %f43;
	mul.ftz.f32 	%f45, %f8, %f44;
	fma.rn.ftz.f32 	%f46, %f1, %f22, %f45;
	mul.ftz.f32 	%f47, %f24, %f46;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f47;
	mov.f32 	%f48, 0f3f800000;    	// 1
	mov.f32 	%f49, 0f3f800000;    	// 1
	mov.f32 	%f50, 0f00000000;    	// 0
	max.ftz.f32 	%f51, %f6, %f50;
	mov.f32 	%f52, 0f3f800000;    	// 1
	min.ftz.f32 	%f53, %f51, %f52;
	sub.ftz.f32 	%f54, %f49, %f53;
	mov.f32 	%f55, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f56, %f2, %f55;
	mov.f32 	%f57, 0f3f800000;    	// 1
	min.ftz.f32 	%f58, %f56, %f57;
	div.approx.ftz.f32 	%f59, %f54, %f58;
	sub.ftz.f32 	%f60, %f48, %f59;
	mov.f32 	%f61, 0f00000000;    	// 0
	max.ftz.f32 	%f62, %f60, %f61;
	mov.f32 	%f63, 0f3f800000;    	// 1
	min.ftz.f32 	%f64, %f62, %f63;
	mul.ftz.f32 	%f65, %f8, %f64;
	fma.rn.ftz.f32 	%f66, %f2, %f22, %f65;
	mul.ftz.f32 	%f67, %f24, %f66;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f67;
	mov.f32 	%f68, 0f3f800000;    	// 1
	mov.f32 	%f69, 0f3f800000;    	// 1
	mov.f32 	%f70, 0f00000000;    	// 0
	max.ftz.f32 	%f71, %f7, %f70;
	mov.f32 	%f72, 0f3f800000;    	// 1
	min.ftz.f32 	%f73, %f71, %f72;
	sub.ftz.f32 	%f74, %f69, %f73;
	mov.f32 	%f75, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f76, %f3, %f75;
	mov.f32 	%f77, 0f3f800000;    	// 1
	min.ftz.f32 	%f78, %f76, %f77;
	div.approx.ftz.f32 	%f79, %f74, %f78;
	sub.ftz.f32 	%f80, %f68, %f79;
	mov.f32 	%f81, 0f00000000;    	// 0
	max.ftz.f32 	%f82, %f80, %f81;
	mov.f32 	%f83, 0f3f800000;    	// 1
	min.ftz.f32 	%f84, %f82, %f83;
	mul.ftz.f32 	%f85, %f8, %f84;
	fma.rn.ftz.f32 	%f86, %f3, %f22, %f85;
	mul.ftz.f32 	%f87, %f24, %f86;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f87;
$Lt_101_30466:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_ColorBurn_Kernel_inDest];
	@!%p2 bra 	$Lt_101_31234;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	473	0
	bra.uni 	$Lt_101_30978;
$Lt_101_31234:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_101_30978:
$Lt_101_28930:
	.loc	22	473	0
	exit;
$LDWend_BlendMode_IR_BlendMode_ColorBurn_Kernel:
	} // BlendMode_IR_BlendMode_ColorBurn_Kernel

	.entry BlendMode_IR_BlendMode_LinearBurn_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<83>;
	.reg .pred %p<5>;
	.loc	22	474	0
$LDWbegin_BlendMode_IR_BlendMode_LinearBurn_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_102_28930;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inSrc0];
	@!%p2 bra 	$Lt_102_29698;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	474	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_102_29442;
$Lt_102_29698:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_102_29442:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inSrc1];
	@!%p2 bra 	$Lt_102_30210;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	474	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_102_29954;
$Lt_102_30210:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_102_29954:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_102_30722;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_102_30466;
$Lt_102_30722:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	max.ftz.f32 	%f29, %f1, %f28;
	mov.f32 	%f30, 0f3f800000;    	// 1
	min.ftz.f32 	%f31, %f29, %f30;
	mov.f32 	%f32, 0f00000000;    	// 0
	max.ftz.f32 	%f33, %f5, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	min.ftz.f32 	%f35, %f33, %f34;
	add.ftz.f32 	%f36, %f31, %f35;
	mov.f32 	%f37, 0fbf800000;    	// -1
	add.ftz.f32 	%f38, %f36, %f37;
	mov.f32 	%f39, 0f00000000;    	// 0
	max.ftz.f32 	%f40, %f38, %f39;
	mov.f32 	%f41, 0f3f800000;    	// 1
	min.ftz.f32 	%f42, %f40, %f41;
	mul.ftz.f32 	%f43, %f8, %f42;
	fma.rn.ftz.f32 	%f44, %f1, %f22, %f43;
	mul.ftz.f32 	%f45, %f24, %f44;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f45;
	mov.f32 	%f46, 0f00000000;    	// 0
	max.ftz.f32 	%f47, %f2, %f46;
	mov.f32 	%f48, 0f3f800000;    	// 1
	min.ftz.f32 	%f49, %f47, %f48;
	mov.f32 	%f50, 0f00000000;    	// 0
	max.ftz.f32 	%f51, %f6, %f50;
	mov.f32 	%f52, 0f3f800000;    	// 1
	min.ftz.f32 	%f53, %f51, %f52;
	add.ftz.f32 	%f54, %f49, %f53;
	mov.f32 	%f55, 0fbf800000;    	// -1
	add.ftz.f32 	%f56, %f54, %f55;
	mov.f32 	%f57, 0f00000000;    	// 0
	max.ftz.f32 	%f58, %f56, %f57;
	mov.f32 	%f59, 0f3f800000;    	// 1
	min.ftz.f32 	%f60, %f58, %f59;
	mul.ftz.f32 	%f61, %f8, %f60;
	fma.rn.ftz.f32 	%f62, %f2, %f22, %f61;
	mul.ftz.f32 	%f63, %f24, %f62;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f63;
	mov.f32 	%f64, 0f00000000;    	// 0
	max.ftz.f32 	%f65, %f3, %f64;
	mov.f32 	%f66, 0f3f800000;    	// 1
	min.ftz.f32 	%f67, %f65, %f66;
	mov.f32 	%f68, 0f00000000;    	// 0
	max.ftz.f32 	%f69, %f7, %f68;
	mov.f32 	%f70, 0f3f800000;    	// 1
	min.ftz.f32 	%f71, %f69, %f70;
	add.ftz.f32 	%f72, %f67, %f71;
	mov.f32 	%f73, 0fbf800000;    	// -1
	add.ftz.f32 	%f74, %f72, %f73;
	mov.f32 	%f75, 0f00000000;    	// 0
	max.ftz.f32 	%f76, %f74, %f75;
	mov.f32 	%f77, 0f3f800000;    	// 1
	min.ftz.f32 	%f78, %f76, %f77;
	mul.ftz.f32 	%f79, %f8, %f78;
	fma.rn.ftz.f32 	%f80, %f3, %f22, %f79;
	mul.ftz.f32 	%f81, %f24, %f80;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f81;
$Lt_102_30466:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_LinearBurn_Kernel_inDest];
	@!%p2 bra 	$Lt_102_31234;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	474	0
	bra.uni 	$Lt_102_30978;
$Lt_102_31234:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_102_30978:
$Lt_102_28930:
	.loc	22	474	0
	exit;
$LDWend_BlendMode_IR_BlendMode_LinearBurn_Kernel:
	} // BlendMode_IR_BlendMode_LinearBurn_Kernel

	.entry BlendMode_IR_BlendMode_ColorDodge_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<83>;
	.reg .pred %p<5>;
	.loc	22	475	0
$LDWbegin_BlendMode_IR_BlendMode_ColorDodge_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_103_28930;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inSrc0];
	@!%p2 bra 	$Lt_103_29698;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	475	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_103_29442;
$Lt_103_29698:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_103_29442:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inSrc1];
	@!%p2 bra 	$Lt_103_30210;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	475	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_103_29954;
$Lt_103_30210:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_103_29954:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_103_30722;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_103_30466;
$Lt_103_30722:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	max.ftz.f32 	%f29, %f5, %f28;
	mov.f32 	%f30, 0f3f800000;    	// 1
	min.ftz.f32 	%f31, %f29, %f30;
	mov.f32 	%f32, 0f3f800000;    	// 1
	mov.f32 	%f33, 0f00000000;    	// 0
	max.ftz.f32 	%f34, %f1, %f33;
	mov.f32 	%f35, 0f3f7fff58;    	// 0.99999
	min.ftz.f32 	%f36, %f34, %f35;
	sub.ftz.f32 	%f37, %f32, %f36;
	div.approx.ftz.f32 	%f38, %f31, %f37;
	mov.f32 	%f39, 0f00000000;    	// 0
	max.ftz.f32 	%f40, %f38, %f39;
	mov.f32 	%f41, 0f3f800000;    	// 1
	min.ftz.f32 	%f42, %f40, %f41;
	mul.ftz.f32 	%f43, %f8, %f42;
	fma.rn.ftz.f32 	%f44, %f1, %f22, %f43;
	mul.ftz.f32 	%f45, %f24, %f44;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f45;
	mov.f32 	%f46, 0f00000000;    	// 0
	max.ftz.f32 	%f47, %f6, %f46;
	mov.f32 	%f48, 0f3f800000;    	// 1
	min.ftz.f32 	%f49, %f47, %f48;
	mov.f32 	%f50, 0f3f800000;    	// 1
	mov.f32 	%f51, 0f00000000;    	// 0
	max.ftz.f32 	%f52, %f2, %f51;
	mov.f32 	%f53, 0f3f7fff58;    	// 0.99999
	min.ftz.f32 	%f54, %f52, %f53;
	sub.ftz.f32 	%f55, %f50, %f54;
	div.approx.ftz.f32 	%f56, %f49, %f55;
	mov.f32 	%f57, 0f00000000;    	// 0
	max.ftz.f32 	%f58, %f56, %f57;
	mov.f32 	%f59, 0f3f800000;    	// 1
	min.ftz.f32 	%f60, %f58, %f59;
	mul.ftz.f32 	%f61, %f8, %f60;
	fma.rn.ftz.f32 	%f62, %f2, %f22, %f61;
	mul.ftz.f32 	%f63, %f24, %f62;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f63;
	mov.f32 	%f64, 0f00000000;    	// 0
	max.ftz.f32 	%f65, %f7, %f64;
	mov.f32 	%f66, 0f3f800000;    	// 1
	min.ftz.f32 	%f67, %f65, %f66;
	mov.f32 	%f68, 0f3f800000;    	// 1
	mov.f32 	%f69, 0f00000000;    	// 0
	max.ftz.f32 	%f70, %f3, %f69;
	mov.f32 	%f71, 0f3f7fff58;    	// 0.99999
	min.ftz.f32 	%f72, %f70, %f71;
	sub.ftz.f32 	%f73, %f68, %f72;
	div.approx.ftz.f32 	%f74, %f67, %f73;
	mov.f32 	%f75, 0f00000000;    	// 0
	max.ftz.f32 	%f76, %f74, %f75;
	mov.f32 	%f77, 0f3f800000;    	// 1
	min.ftz.f32 	%f78, %f76, %f77;
	mul.ftz.f32 	%f79, %f8, %f78;
	fma.rn.ftz.f32 	%f80, %f3, %f22, %f79;
	mul.ftz.f32 	%f81, %f24, %f80;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f81;
$Lt_103_30466:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_ColorDodge_Kernel_inDest];
	@!%p2 bra 	$Lt_103_31234;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	475	0
	bra.uni 	$Lt_103_30978;
$Lt_103_31234:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_103_30978:
$Lt_103_28930:
	.loc	22	475	0
	exit;
$LDWend_BlendMode_IR_BlendMode_ColorDodge_Kernel:
	} // BlendMode_IR_BlendMode_ColorDodge_Kernel

	.entry BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<53>;
	.reg .pred %p<5>;
	.loc	22	476	0
$LDWbegin_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_104_22786;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inSrc0];
	@!%p2 bra 	$Lt_104_23554;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	476	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_104_23298;
$Lt_104_23554:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_104_23298:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inSrc1];
	@!%p2 bra 	$Lt_104_24066;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	476	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_104_23810;
$Lt_104_24066:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_104_23810:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_104_24578;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_104_24322;
$Lt_104_24578:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	add.ftz.f32 	%f28, %f1, %f5;
	mov.f32 	%f29, 0f00000000;    	// 0
	max.ftz.f32 	%f30, %f28, %f29;
	mov.f32 	%f31, 0f3f800000;    	// 1
	min.ftz.f32 	%f32, %f30, %f31;
	mul.ftz.f32 	%f33, %f8, %f32;
	fma.rn.ftz.f32 	%f34, %f1, %f22, %f33;
	mul.ftz.f32 	%f35, %f24, %f34;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f35;
	add.ftz.f32 	%f36, %f2, %f6;
	mov.f32 	%f37, 0f00000000;    	// 0
	max.ftz.f32 	%f38, %f36, %f37;
	mov.f32 	%f39, 0f3f800000;    	// 1
	min.ftz.f32 	%f40, %f38, %f39;
	mul.ftz.f32 	%f41, %f8, %f40;
	fma.rn.ftz.f32 	%f42, %f2, %f22, %f41;
	mul.ftz.f32 	%f43, %f24, %f42;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f43;
	add.ftz.f32 	%f44, %f3, %f7;
	mov.f32 	%f45, 0f00000000;    	// 0
	max.ftz.f32 	%f46, %f44, %f45;
	mov.f32 	%f47, 0f3f800000;    	// 1
	min.ftz.f32 	%f48, %f46, %f47;
	mul.ftz.f32 	%f49, %f8, %f48;
	fma.rn.ftz.f32 	%f50, %f3, %f22, %f49;
	mul.ftz.f32 	%f51, %f24, %f50;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f51;
$Lt_104_24322:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel_inDest];
	@!%p2 bra 	$Lt_104_25090;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	476	0
	bra.uni 	$Lt_104_24834;
$Lt_104_25090:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_104_24834:
$Lt_104_22786:
	.loc	22	476	0
	exit;
$LDWend_BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel:
	} // BlendMode_IR_BlendMode_LinearDodgeAdd_Kernel

	.entry BlendMode_IR_BlendMode_Overlay_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<104>;
	.reg .pred %p<8>;
	.loc	22	477	0
$LDWbegin_BlendMode_IR_BlendMode_Overlay_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_105_31234;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inSrc0];
	@!%p2 bra 	$Lt_105_32002;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	477	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_105_31746;
$Lt_105_32002:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_105_31746:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inSrc1];
	@!%p2 bra 	$Lt_105_32514;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	477	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_105_32258;
$Lt_105_32514:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_105_32258:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_105_33026;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_105_32770;
$Lt_105_33026:
	.loc	22	373	0
	mov.f32 	%f21, 0f00000000;    	// 0
	max.ftz.f32 	%f22, %f5, %f21;
	mov.f32 	%f23, 0f00000000;    	// 0
	max.ftz.f32 	%f24, %f1, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	min.ftz.f32 	%f26, %f22, %f25;
	mov.f32 	%f27, 0f3f800000;    	// 1
	min.ftz.f32 	%f28, %f24, %f27;
	mov.f32 	%f29, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p4, %f26, %f29;
	@!%p4 bra 	$Lt_105_33538;
	add.ftz.f32 	%f30, %f28, %f28;
	mul.ftz.f32 	%f31, %f26, %f30;
	bra.uni 	$Lt_105_33282;
$Lt_105_33538:
	mov.f32 	%f32, 0f3f800000;    	// 1
	sub.ftz.f32 	%f33, %f32, %f28;
	mov.f32 	%f34, 0f3f800000;    	// 1
	add.ftz.f32 	%f35, %f33, %f33;
	mov.f32 	%f36, 0f3f800000;    	// 1
	sub.ftz.f32 	%f37, %f36, %f26;
	mul.ftz.f32 	%f38, %f35, %f37;
	sub.ftz.f32 	%f31, %f34, %f38;
$Lt_105_33282:
	.loc	22	477	0
	mov.f32 	%f39, 0f3f800000;    	// 1
	sub.ftz.f32 	%f40, %f39, %f8;
	rcp.approx.ftz.f32 	%f41, %f13;
	mul.ftz.f32 	%f42, %f10, %f41;
	mov.f32 	%f43, 0f3f800000;    	// 1
	mul.ftz.f32 	%f44, %f10, %f41;
	sub.ftz.f32 	%f45, %f43, %f44;
	mov.f32 	%f46, 0f00000000;    	// 0
	max.ftz.f32 	%f47, %f31, %f46;
	mov.f32 	%f48, 0f3f800000;    	// 1
	min.ftz.f32 	%f49, %f47, %f48;
	mul.ftz.f32 	%f50, %f8, %f49;
	fma.rn.ftz.f32 	%f51, %f1, %f40, %f50;
	mul.ftz.f32 	%f52, %f42, %f51;
	fma.rn.ftz.f32 	%f20, %f5, %f45, %f52;
	.loc	22	373	0
	mov.f32 	%f53, 0f00000000;    	// 0
	max.ftz.f32 	%f54, %f6, %f53;
	mov.f32 	%f55, 0f00000000;    	// 0
	max.ftz.f32 	%f56, %f2, %f55;
	mov.f32 	%f57, 0f3f800000;    	// 1
	min.ftz.f32 	%f58, %f54, %f57;
	mov.f32 	%f59, 0f3f800000;    	// 1
	min.ftz.f32 	%f60, %f56, %f59;
	mov.f32 	%f61, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p5, %f58, %f61;
	@!%p5 bra 	$Lt_105_34050;
	add.ftz.f32 	%f62, %f60, %f60;
	mul.ftz.f32 	%f63, %f58, %f62;
	bra.uni 	$Lt_105_33794;
$Lt_105_34050:
	mov.f32 	%f64, 0f3f800000;    	// 1
	sub.ftz.f32 	%f65, %f64, %f60;
	mov.f32 	%f66, 0f3f800000;    	// 1
	add.ftz.f32 	%f67, %f65, %f65;
	mov.f32 	%f68, 0f3f800000;    	// 1
	sub.ftz.f32 	%f69, %f68, %f58;
	mul.ftz.f32 	%f70, %f67, %f69;
	sub.ftz.f32 	%f63, %f66, %f70;
$Lt_105_33794:
	.loc	22	477	0
	mov.f32 	%f71, 0f00000000;    	// 0
	max.ftz.f32 	%f72, %f63, %f71;
	mov.f32 	%f73, 0f3f800000;    	// 1
	min.ftz.f32 	%f74, %f72, %f73;
	mul.ftz.f32 	%f75, %f8, %f74;
	fma.rn.ftz.f32 	%f76, %f2, %f40, %f75;
	mul.ftz.f32 	%f77, %f42, %f76;
	fma.rn.ftz.f32 	%f19, %f6, %f45, %f77;
	.loc	22	373	0
	mov.f32 	%f78, 0f00000000;    	// 0
	max.ftz.f32 	%f79, %f7, %f78;
	mov.f32 	%f80, 0f00000000;    	// 0
	max.ftz.f32 	%f81, %f3, %f80;
	mov.f32 	%f82, 0f3f800000;    	// 1
	min.ftz.f32 	%f83, %f79, %f82;
	mov.f32 	%f84, 0f3f800000;    	// 1
	min.ftz.f32 	%f85, %f81, %f84;
	mov.f32 	%f86, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p6, %f83, %f86;
	@!%p6 bra 	$Lt_105_34562;
	add.ftz.f32 	%f87, %f85, %f85;
	mul.ftz.f32 	%f88, %f83, %f87;
	bra.uni 	$Lt_105_34306;
$Lt_105_34562:
	mov.f32 	%f89, 0f3f800000;    	// 1
	sub.ftz.f32 	%f90, %f89, %f85;
	mov.f32 	%f91, 0f3f800000;    	// 1
	add.ftz.f32 	%f92, %f90, %f90;
	mov.f32 	%f93, 0f3f800000;    	// 1
	sub.ftz.f32 	%f94, %f93, %f83;
	mul.ftz.f32 	%f95, %f92, %f94;
	sub.ftz.f32 	%f88, %f91, %f95;
$Lt_105_34306:
	.loc	22	477	0
	mov.f32 	%f96, 0f00000000;    	// 0
	max.ftz.f32 	%f97, %f88, %f96;
	mov.f32 	%f98, 0f3f800000;    	// 1
	min.ftz.f32 	%f99, %f97, %f98;
	mul.ftz.f32 	%f100, %f8, %f99;
	fma.rn.ftz.f32 	%f101, %f3, %f40, %f100;
	mul.ftz.f32 	%f102, %f42, %f101;
	fma.rn.ftz.f32 	%f18, %f7, %f45, %f102;
$Lt_105_32770:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Overlay_Kernel_inDest];
	@!%p2 bra 	$Lt_105_35074;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	477	0
	bra.uni 	$Lt_105_34818;
$Lt_105_35074:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_105_34818:
$Lt_105_31234:
	.loc	22	477	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Overlay_Kernel:
	} // BlendMode_IR_BlendMode_Overlay_Kernel

	.entry BlendMode_IR_BlendMode_SoftLight_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<101>;
	.reg .pred %p<8>;
	.loc	22	478	0
$LDWbegin_BlendMode_IR_BlendMode_SoftLight_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_106_31234;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inSrc0];
	@!%p2 bra 	$Lt_106_32002;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	478	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_106_31746;
$Lt_106_32002:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_106_31746:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inSrc1];
	@!%p2 bra 	$Lt_106_32514;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	478	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_106_32258;
$Lt_106_32514:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_106_32258:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_106_33026;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_106_32770;
$Lt_106_33026:
	.loc	22	380	0
	mov.f32 	%f21, 0f00000000;    	// 0
	max.ftz.f32 	%f22, %f1, %f21;
	mov.f32 	%f23, 0f00000000;    	// 0
	max.ftz.f32 	%f24, %f5, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	min.ftz.f32 	%f26, %f22, %f25;
	mov.f32 	%f27, 0f3f800000;    	// 1
	min.ftz.f32 	%f28, %f24, %f27;
	add.ftz.f32 	%f29, %f26, %f26;
	mov.f32 	%f30, 0fbf800000;    	// -1
	add.ftz.f32 	%f31, %f29, %f30;
	mov.f32 	%f32, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p4, %f26, %f32;
	@!%p4 bra 	$Lt_106_33538;
	mul.ftz.f32 	%f33, %f28, %f28;
	sub.ftz.f32 	%f34, %f28, %f33;
	fma.rn.ftz.f32 	%f35, %f31, %f34, %f28;
	bra.uni 	$Lt_106_33282;
$Lt_106_33538:
	sqrt.approx.ftz.f32 	%f36, %f28;
	sub.ftz.f32 	%f37, %f36, %f28;
	fma.rn.ftz.f32 	%f35, %f31, %f37, %f28;
$Lt_106_33282:
	.loc	22	478	0
	mov.f32 	%f38, 0f3f800000;    	// 1
	sub.ftz.f32 	%f39, %f38, %f8;
	rcp.approx.ftz.f32 	%f40, %f13;
	mul.ftz.f32 	%f41, %f10, %f40;
	mov.f32 	%f42, 0f3f800000;    	// 1
	mul.ftz.f32 	%f43, %f10, %f40;
	sub.ftz.f32 	%f44, %f42, %f43;
	mov.f32 	%f45, 0f00000000;    	// 0
	max.ftz.f32 	%f46, %f35, %f45;
	mov.f32 	%f47, 0f3f800000;    	// 1
	min.ftz.f32 	%f48, %f46, %f47;
	mul.ftz.f32 	%f49, %f8, %f48;
	fma.rn.ftz.f32 	%f50, %f1, %f39, %f49;
	mul.ftz.f32 	%f51, %f41, %f50;
	fma.rn.ftz.f32 	%f20, %f5, %f44, %f51;
	.loc	22	380	0
	mov.f32 	%f52, 0f00000000;    	// 0
	max.ftz.f32 	%f53, %f2, %f52;
	mov.f32 	%f54, 0f00000000;    	// 0
	max.ftz.f32 	%f55, %f6, %f54;
	mov.f32 	%f56, 0f3f800000;    	// 1
	min.ftz.f32 	%f57, %f53, %f56;
	mov.f32 	%f58, 0f3f800000;    	// 1
	min.ftz.f32 	%f59, %f55, %f58;
	add.ftz.f32 	%f60, %f57, %f57;
	mov.f32 	%f61, 0fbf800000;    	// -1
	add.ftz.f32 	%f62, %f60, %f61;
	mov.f32 	%f63, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p5, %f57, %f63;
	@!%p5 bra 	$Lt_106_34050;
	mul.ftz.f32 	%f64, %f59, %f59;
	sub.ftz.f32 	%f65, %f59, %f64;
	fma.rn.ftz.f32 	%f66, %f62, %f65, %f59;
	bra.uni 	$Lt_106_33794;
$Lt_106_34050:
	sqrt.approx.ftz.f32 	%f67, %f59;
	sub.ftz.f32 	%f68, %f67, %f59;
	fma.rn.ftz.f32 	%f66, %f62, %f68, %f59;
$Lt_106_33794:
	.loc	22	478	0
	mov.f32 	%f69, 0f00000000;    	// 0
	max.ftz.f32 	%f70, %f66, %f69;
	mov.f32 	%f71, 0f3f800000;    	// 1
	min.ftz.f32 	%f72, %f70, %f71;
	mul.ftz.f32 	%f73, %f8, %f72;
	fma.rn.ftz.f32 	%f74, %f2, %f39, %f73;
	mul.ftz.f32 	%f75, %f41, %f74;
	fma.rn.ftz.f32 	%f19, %f6, %f44, %f75;
	.loc	22	380	0
	mov.f32 	%f76, 0f00000000;    	// 0
	max.ftz.f32 	%f77, %f3, %f76;
	mov.f32 	%f78, 0f00000000;    	// 0
	max.ftz.f32 	%f79, %f7, %f78;
	mov.f32 	%f80, 0f3f800000;    	// 1
	min.ftz.f32 	%f81, %f77, %f80;
	mov.f32 	%f82, 0f3f800000;    	// 1
	min.ftz.f32 	%f83, %f79, %f82;
	add.ftz.f32 	%f84, %f81, %f81;
	mov.f32 	%f85, 0fbf800000;    	// -1
	add.ftz.f32 	%f86, %f84, %f85;
	mov.f32 	%f87, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p6, %f81, %f87;
	@!%p6 bra 	$Lt_106_34562;
	mul.ftz.f32 	%f88, %f83, %f83;
	sub.ftz.f32 	%f89, %f83, %f88;
	fma.rn.ftz.f32 	%f90, %f86, %f89, %f83;
	bra.uni 	$Lt_106_34306;
$Lt_106_34562:
	sqrt.approx.ftz.f32 	%f91, %f83;
	sub.ftz.f32 	%f92, %f91, %f83;
	fma.rn.ftz.f32 	%f90, %f86, %f92, %f83;
$Lt_106_34306:
	.loc	22	478	0
	mov.f32 	%f93, 0f00000000;    	// 0
	max.ftz.f32 	%f94, %f90, %f93;
	mov.f32 	%f95, 0f3f800000;    	// 1
	min.ftz.f32 	%f96, %f94, %f95;
	mul.ftz.f32 	%f97, %f8, %f96;
	fma.rn.ftz.f32 	%f98, %f3, %f39, %f97;
	mul.ftz.f32 	%f99, %f41, %f98;
	fma.rn.ftz.f32 	%f18, %f7, %f44, %f99;
$Lt_106_32770:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_SoftLight_Kernel_inDest];
	@!%p2 bra 	$Lt_106_35074;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	478	0
	bra.uni 	$Lt_106_34818;
$Lt_106_35074:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_106_34818:
$Lt_106_31234:
	.loc	22	478	0
	exit;
$LDWend_BlendMode_IR_BlendMode_SoftLight_Kernel:
	} // BlendMode_IR_BlendMode_SoftLight_Kernel

	.entry BlendMode_IR_BlendMode_HardLight_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<104>;
	.reg .pred %p<8>;
	.loc	22	479	0
$LDWbegin_BlendMode_IR_BlendMode_HardLight_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_107_31234;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inSrc0];
	@!%p2 bra 	$Lt_107_32002;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	479	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_107_31746;
$Lt_107_32002:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_107_31746:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inSrc1];
	@!%p2 bra 	$Lt_107_32514;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	479	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_107_32258;
$Lt_107_32514:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_107_32258:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_107_33026;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_107_32770;
$Lt_107_33026:
	.loc	22	386	0
	mov.f32 	%f21, 0f00000000;    	// 0
	max.ftz.f32 	%f22, %f1, %f21;
	mov.f32 	%f23, 0f00000000;    	// 0
	max.ftz.f32 	%f24, %f5, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	min.ftz.f32 	%f26, %f22, %f25;
	mov.f32 	%f27, 0f3f800000;    	// 1
	min.ftz.f32 	%f28, %f24, %f27;
	mov.f32 	%f29, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p4, %f26, %f29;
	@!%p4 bra 	$Lt_107_33538;
	add.ftz.f32 	%f30, %f26, %f26;
	mul.ftz.f32 	%f31, %f28, %f30;
	bra.uni 	$Lt_107_33282;
$Lt_107_33538:
	mov.f32 	%f32, 0f3f800000;    	// 1
	sub.ftz.f32 	%f33, %f32, %f26;
	mov.f32 	%f34, 0f3f800000;    	// 1
	add.ftz.f32 	%f35, %f33, %f33;
	mov.f32 	%f36, 0f3f800000;    	// 1
	sub.ftz.f32 	%f37, %f36, %f28;
	mul.ftz.f32 	%f38, %f35, %f37;
	sub.ftz.f32 	%f31, %f34, %f38;
$Lt_107_33282:
	.loc	22	479	0
	mov.f32 	%f39, 0f3f800000;    	// 1
	sub.ftz.f32 	%f40, %f39, %f8;
	rcp.approx.ftz.f32 	%f41, %f13;
	mul.ftz.f32 	%f42, %f10, %f41;
	mov.f32 	%f43, 0f3f800000;    	// 1
	mul.ftz.f32 	%f44, %f10, %f41;
	sub.ftz.f32 	%f45, %f43, %f44;
	mov.f32 	%f46, 0f00000000;    	// 0
	max.ftz.f32 	%f47, %f31, %f46;
	mov.f32 	%f48, 0f3f800000;    	// 1
	min.ftz.f32 	%f49, %f47, %f48;
	mul.ftz.f32 	%f50, %f8, %f49;
	fma.rn.ftz.f32 	%f51, %f1, %f40, %f50;
	mul.ftz.f32 	%f52, %f42, %f51;
	fma.rn.ftz.f32 	%f20, %f5, %f45, %f52;
	.loc	22	386	0
	mov.f32 	%f53, 0f00000000;    	// 0
	max.ftz.f32 	%f54, %f2, %f53;
	mov.f32 	%f55, 0f00000000;    	// 0
	max.ftz.f32 	%f56, %f6, %f55;
	mov.f32 	%f57, 0f3f800000;    	// 1
	min.ftz.f32 	%f58, %f54, %f57;
	mov.f32 	%f59, 0f3f800000;    	// 1
	min.ftz.f32 	%f60, %f56, %f59;
	mov.f32 	%f61, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p5, %f58, %f61;
	@!%p5 bra 	$Lt_107_34050;
	add.ftz.f32 	%f62, %f58, %f58;
	mul.ftz.f32 	%f63, %f60, %f62;
	bra.uni 	$Lt_107_33794;
$Lt_107_34050:
	mov.f32 	%f64, 0f3f800000;    	// 1
	sub.ftz.f32 	%f65, %f64, %f58;
	mov.f32 	%f66, 0f3f800000;    	// 1
	add.ftz.f32 	%f67, %f65, %f65;
	mov.f32 	%f68, 0f3f800000;    	// 1
	sub.ftz.f32 	%f69, %f68, %f60;
	mul.ftz.f32 	%f70, %f67, %f69;
	sub.ftz.f32 	%f63, %f66, %f70;
$Lt_107_33794:
	.loc	22	479	0
	mov.f32 	%f71, 0f00000000;    	// 0
	max.ftz.f32 	%f72, %f63, %f71;
	mov.f32 	%f73, 0f3f800000;    	// 1
	min.ftz.f32 	%f74, %f72, %f73;
	mul.ftz.f32 	%f75, %f8, %f74;
	fma.rn.ftz.f32 	%f76, %f2, %f40, %f75;
	mul.ftz.f32 	%f77, %f42, %f76;
	fma.rn.ftz.f32 	%f19, %f6, %f45, %f77;
	.loc	22	386	0
	mov.f32 	%f78, 0f00000000;    	// 0
	max.ftz.f32 	%f79, %f3, %f78;
	mov.f32 	%f80, 0f00000000;    	// 0
	max.ftz.f32 	%f81, %f7, %f80;
	mov.f32 	%f82, 0f3f800000;    	// 1
	min.ftz.f32 	%f83, %f79, %f82;
	mov.f32 	%f84, 0f3f800000;    	// 1
	min.ftz.f32 	%f85, %f81, %f84;
	mov.f32 	%f86, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p6, %f83, %f86;
	@!%p6 bra 	$Lt_107_34562;
	add.ftz.f32 	%f87, %f83, %f83;
	mul.ftz.f32 	%f88, %f85, %f87;
	bra.uni 	$Lt_107_34306;
$Lt_107_34562:
	mov.f32 	%f89, 0f3f800000;    	// 1
	sub.ftz.f32 	%f90, %f89, %f83;
	mov.f32 	%f91, 0f3f800000;    	// 1
	add.ftz.f32 	%f92, %f90, %f90;
	mov.f32 	%f93, 0f3f800000;    	// 1
	sub.ftz.f32 	%f94, %f93, %f85;
	mul.ftz.f32 	%f95, %f92, %f94;
	sub.ftz.f32 	%f88, %f91, %f95;
$Lt_107_34306:
	.loc	22	479	0
	mov.f32 	%f96, 0f00000000;    	// 0
	max.ftz.f32 	%f97, %f88, %f96;
	mov.f32 	%f98, 0f3f800000;    	// 1
	min.ftz.f32 	%f99, %f97, %f98;
	mul.ftz.f32 	%f100, %f8, %f99;
	fma.rn.ftz.f32 	%f101, %f3, %f40, %f100;
	mul.ftz.f32 	%f102, %f42, %f101;
	fma.rn.ftz.f32 	%f18, %f7, %f45, %f102;
$Lt_107_32770:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_HardLight_Kernel_inDest];
	@!%p2 bra 	$Lt_107_35074;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	479	0
	bra.uni 	$Lt_107_34818;
$Lt_107_35074:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_107_34818:
$Lt_107_31234:
	.loc	22	479	0
	exit;
$LDWend_BlendMode_IR_BlendMode_HardLight_Kernel:
	} // BlendMode_IR_BlendMode_HardLight_Kernel

	.entry BlendMode_IR_BlendMode_VividLight_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<116>;
	.reg .pred %p<8>;
	.loc	22	480	0
$LDWbegin_BlendMode_IR_BlendMode_VividLight_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_108_36610;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inSrc0];
	@!%p2 bra 	$Lt_108_37378;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	480	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_108_37122;
$Lt_108_37378:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_108_37122:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inSrc1];
	@!%p2 bra 	$Lt_108_37890;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	480	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_108_37634;
$Lt_108_37890:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_108_37634:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_108_38402;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_108_38146;
$Lt_108_38402:
	.loc	22	431	0
	mov.f32 	%f21, 0f358637bd;    	// 1e-006
	max.ftz.f32 	%f22, %f1, %f21;
	mov.f32 	%f23, 0f00000000;    	// 0
	max.ftz.f32 	%f24, %f5, %f23;
	mov.f32 	%f25, 0f3f7fffef;    	// 0.999999
	min.ftz.f32 	%f26, %f22, %f25;
	mov.f32 	%f27, 0f3f800000;    	// 1
	min.ftz.f32 	%f28, %f24, %f27;
	mov.f32 	%f29, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p4, %f26, %f29;
	@!%p4 bra 	$Lt_108_34818;
	.loc	22	433	0
	mov.f32 	%f30, 0f3f800000;    	// 1
	mov.f32 	%f31, 0f3f800000;    	// 1
	sub.ftz.f32 	%f32, %f31, %f28;
	add.ftz.f32 	%f33, %f26, %f26;
	div.approx.ftz.f32 	%f34, %f32, %f33;
	sub.ftz.f32 	%f35, %f30, %f34;
	mov.f32 	%f36, 0f00000000;    	// 0
	max.ftz.f32 	%f37, %f35, %f36;
	mov.f32 	%f38, 0f3f800000;    	// 1
	min.ftz.f32 	%f39, %f37, %f38;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__285_5;
$Lt_108_34818:
	.loc	22	437	0
	mov.f32 	%f40, 0f3f800000;    	// 1
	sub.ftz.f32 	%f41, %f40, %f26;
	add.ftz.f32 	%f42, %f41, %f41;
	div.approx.ftz.f32 	%f43, %f28, %f42;
	mov.f32 	%f44, 0f00000000;    	// 0
	max.ftz.f32 	%f45, %f43, %f44;
	mov.f32 	%f46, 0f3f800000;    	// 1
	min.ftz.f32 	%f39, %f45, %f46;
$LDWendi__Z5ClampIfET_S0_S0_S0__285_5:
	.loc	22	480	0
	mov.f32 	%f47, 0f3f800000;    	// 1
	sub.ftz.f32 	%f48, %f47, %f8;
	rcp.approx.ftz.f32 	%f49, %f13;
	mul.ftz.f32 	%f50, %f10, %f49;
	mov.f32 	%f51, 0f3f800000;    	// 1
	mul.ftz.f32 	%f52, %f10, %f49;
	sub.ftz.f32 	%f53, %f51, %f52;
	mul.ftz.f32 	%f54, %f39, %f8;
	fma.rn.ftz.f32 	%f55, %f1, %f48, %f54;
	mul.ftz.f32 	%f56, %f50, %f55;
	fma.rn.ftz.f32 	%f20, %f5, %f53, %f56;
	.loc	22	431	0
	mov.f32 	%f57, 0f358637bd;    	// 1e-006
	max.ftz.f32 	%f58, %f2, %f57;
	mov.f32 	%f59, 0f00000000;    	// 0
	max.ftz.f32 	%f60, %f6, %f59;
	mov.f32 	%f61, 0f3f7fffef;    	// 0.999999
	min.ftz.f32 	%f62, %f58, %f61;
	mov.f32 	%f63, 0f3f800000;    	// 1
	min.ftz.f32 	%f64, %f60, %f63;
	mov.f32 	%f65, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p5, %f62, %f65;
	@!%p5 bra 	$Lt_108_35074;
	.loc	22	433	0
	mov.f32 	%f66, 0f3f800000;    	// 1
	mov.f32 	%f67, 0f3f800000;    	// 1
	sub.ftz.f32 	%f68, %f67, %f64;
	add.ftz.f32 	%f69, %f62, %f62;
	div.approx.ftz.f32 	%f70, %f68, %f69;
	sub.ftz.f32 	%f71, %f66, %f70;
	mov.f32 	%f72, 0f00000000;    	// 0
	max.ftz.f32 	%f73, %f71, %f72;
	mov.f32 	%f74, 0f3f800000;    	// 1
	min.ftz.f32 	%f75, %f73, %f74;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__285_3;
$Lt_108_35074:
	.loc	22	437	0
	mov.f32 	%f76, 0f3f800000;    	// 1
	sub.ftz.f32 	%f77, %f76, %f62;
	add.ftz.f32 	%f78, %f77, %f77;
	div.approx.ftz.f32 	%f79, %f64, %f78;
	mov.f32 	%f80, 0f00000000;    	// 0
	max.ftz.f32 	%f81, %f79, %f80;
	mov.f32 	%f82, 0f3f800000;    	// 1
	min.ftz.f32 	%f75, %f81, %f82;
$LDWendi__Z5ClampIfET_S0_S0_S0__285_3:
	.loc	22	480	0
	mul.ftz.f32 	%f83, %f75, %f8;
	fma.rn.ftz.f32 	%f84, %f2, %f48, %f83;
	mul.ftz.f32 	%f85, %f50, %f84;
	fma.rn.ftz.f32 	%f19, %f6, %f53, %f85;
	.loc	22	431	0
	mov.f32 	%f86, 0f358637bd;    	// 1e-006
	max.ftz.f32 	%f87, %f3, %f86;
	mov.f32 	%f88, 0f00000000;    	// 0
	max.ftz.f32 	%f89, %f7, %f88;
	mov.f32 	%f90, 0f3f7fffef;    	// 0.999999
	min.ftz.f32 	%f91, %f87, %f90;
	mov.f32 	%f92, 0f3f800000;    	// 1
	min.ftz.f32 	%f93, %f89, %f92;
	mov.f32 	%f94, 0f3f000000;    	// 0.5
	setp.le.ftz.f32 	%p6, %f91, %f94;
	@!%p6 bra 	$Lt_108_35330;
	.loc	22	433	0
	mov.f32 	%f95, 0f3f800000;    	// 1
	mov.f32 	%f96, 0f3f800000;    	// 1
	sub.ftz.f32 	%f97, %f96, %f93;
	add.ftz.f32 	%f98, %f91, %f91;
	div.approx.ftz.f32 	%f99, %f97, %f98;
	sub.ftz.f32 	%f100, %f95, %f99;
	mov.f32 	%f101, 0f00000000;   	// 0
	max.ftz.f32 	%f102, %f100, %f101;
	mov.f32 	%f103, 0f3f800000;   	// 1
	min.ftz.f32 	%f104, %f102, %f103;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__285_1;
$Lt_108_35330:
	.loc	22	437	0
	mov.f32 	%f105, 0f3f800000;   	// 1
	sub.ftz.f32 	%f106, %f105, %f91;
	add.ftz.f32 	%f107, %f106, %f106;
	div.approx.ftz.f32 	%f108, %f93, %f107;
	mov.f32 	%f109, 0f00000000;   	// 0
	max.ftz.f32 	%f110, %f108, %f109;
	mov.f32 	%f111, 0f3f800000;   	// 1
	min.ftz.f32 	%f104, %f110, %f111;
$LDWendi__Z5ClampIfET_S0_S0_S0__285_1:
	.loc	22	480	0
	mul.ftz.f32 	%f112, %f104, %f8;
	fma.rn.ftz.f32 	%f113, %f3, %f48, %f112;
	mul.ftz.f32 	%f114, %f50, %f113;
	fma.rn.ftz.f32 	%f18, %f7, %f53, %f114;
$Lt_108_38146:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_VividLight_Kernel_inDest];
	@!%p2 bra 	$Lt_108_38914;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	480	0
	bra.uni 	$Lt_108_38658;
$Lt_108_38914:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_108_38658:
$Lt_108_36610:
	.loc	22	480	0
	exit;
$LDWend_BlendMode_IR_BlendMode_VividLight_Kernel:
	} // BlendMode_IR_BlendMode_VividLight_Kernel

	.entry BlendMode_IR_BlendMode_LinearLight_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<74>;
	.reg .pred %p<5>;
	.loc	22	481	0
$LDWbegin_BlendMode_IR_BlendMode_LinearLight_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_109_25858;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inSrc0];
	@!%p2 bra 	$Lt_109_26626;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	481	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_109_26370;
$Lt_109_26626:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_109_26370:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inSrc1];
	@!%p2 bra 	$Lt_109_27138;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	481	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_109_26882;
$Lt_109_27138:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_109_26882:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_109_27650;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_109_27394;
$Lt_109_27650:
	mov.f32 	%f21, 0f00000000;    	// 0
	max.ftz.f32 	%f22, %f1, %f21;
	mov.f32 	%f23, 0f3f800000;    	// 1
	sub.ftz.f32 	%f24, %f23, %f8;
	mov.f32 	%f25, 0f3f800000;    	// 1
	min.ftz.f32 	%f26, %f22, %f25;
	rcp.approx.ftz.f32 	%f27, %f13;
	mul.ftz.f32 	%f28, %f10, %f27;
	mov.f32 	%f29, 0f3f800000;    	// 1
	mul.ftz.f32 	%f30, %f10, %f27;
	sub.ftz.f32 	%f31, %f29, %f30;
	add.ftz.f32 	%f32, %f26, %f26;
	mov.f32 	%f33, 0f00000000;    	// 0
	max.ftz.f32 	%f34, %f5, %f33;
	mov.f32 	%f35, 0f3f800000;    	// 1
	min.ftz.f32 	%f36, %f34, %f35;
	add.ftz.f32 	%f37, %f32, %f36;
	mov.f32 	%f38, 0fbf800000;    	// -1
	add.ftz.f32 	%f39, %f37, %f38;
	mul.ftz.f32 	%f40, %f8, %f39;
	fma.rn.ftz.f32 	%f41, %f1, %f24, %f40;
	mul.ftz.f32 	%f42, %f28, %f41;
	fma.rn.ftz.f32 	%f20, %f5, %f31, %f42;
	mov.f32 	%f43, 0f00000000;    	// 0
	max.ftz.f32 	%f44, %f2, %f43;
	mov.f32 	%f45, 0f3f800000;    	// 1
	min.ftz.f32 	%f46, %f44, %f45;
	add.ftz.f32 	%f47, %f46, %f46;
	mov.f32 	%f48, 0f00000000;    	// 0
	max.ftz.f32 	%f49, %f6, %f48;
	mov.f32 	%f50, 0f3f800000;    	// 1
	min.ftz.f32 	%f51, %f49, %f50;
	add.ftz.f32 	%f52, %f47, %f51;
	mov.f32 	%f53, 0fbf800000;    	// -1
	add.ftz.f32 	%f54, %f52, %f53;
	mul.ftz.f32 	%f55, %f8, %f54;
	fma.rn.ftz.f32 	%f56, %f2, %f24, %f55;
	mul.ftz.f32 	%f57, %f28, %f56;
	fma.rn.ftz.f32 	%f19, %f6, %f31, %f57;
	mov.f32 	%f58, 0f00000000;    	// 0
	max.ftz.f32 	%f59, %f3, %f58;
	mov.f32 	%f60, 0f3f800000;    	// 1
	min.ftz.f32 	%f61, %f59, %f60;
	add.ftz.f32 	%f62, %f61, %f61;
	mov.f32 	%f63, 0f00000000;    	// 0
	max.ftz.f32 	%f64, %f7, %f63;
	mov.f32 	%f65, 0f3f800000;    	// 1
	min.ftz.f32 	%f66, %f64, %f65;
	add.ftz.f32 	%f67, %f62, %f66;
	mov.f32 	%f68, 0fbf800000;    	// -1
	add.ftz.f32 	%f69, %f67, %f68;
	mul.ftz.f32 	%f70, %f8, %f69;
	fma.rn.ftz.f32 	%f71, %f3, %f24, %f70;
	mul.ftz.f32 	%f72, %f28, %f71;
	fma.rn.ftz.f32 	%f18, %f7, %f31, %f72;
$Lt_109_27394:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_LinearLight_Kernel_inDest];
	@!%p2 bra 	$Lt_109_28162;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	481	0
	bra.uni 	$Lt_109_27906;
$Lt_109_28162:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_109_27906:
$Lt_109_25858:
	.loc	22	481	0
	exit;
$LDWend_BlendMode_IR_BlendMode_LinearLight_Kernel:
	} // BlendMode_IR_BlendMode_LinearLight_Kernel

	.entry BlendMode_IR_BlendMode_PinLight_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<74>;
	.reg .pred %p<11>;
	.loc	22	482	0
$LDWbegin_BlendMode_IR_BlendMode_PinLight_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_110_33538;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inSrc0];
	@!%p2 bra 	$Lt_110_34306;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	482	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_110_34050;
$Lt_110_34306:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_110_34050:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inSrc1];
	@!%p2 bra 	$Lt_110_34818;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	482	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_110_34562;
$Lt_110_34818:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_110_34562:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_110_35330;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_110_35074;
$Lt_110_35330:
	.loc	22	450	0
	mov.f32 	%f21, 0f00000000;    	// 0
	max.ftz.f32 	%f22, %f1, %f21;
	mov.f32 	%f23, 0f00000000;    	// 0
	max.ftz.f32 	%f24, %f5, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	min.ftz.f32 	%f26, %f22, %f25;
	mov.f32 	%f27, 0f3f800000;    	// 1
	min.ftz.f32 	%f28, %f24, %f27;
	add.ftz.f32 	%f29, %f26, %f26;
	mov.f32 	%f30, 0fbf800000;    	// -1
	add.ftz.f32 	%f31, %f29, %f30;
	setp.lt.ftz.f32 	%p4, %f28, %f31;
	@!%p4 bra 	$Lt_110_30210;
	.loc	22	452	0
	mov.f32 	%f32, %f31;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__287_5;
$Lt_110_30210:
	.loc	22	454	0
	setp.gt.ftz.f32 	%p5, %f28, %f29;
	@!%p5 bra 	$Lt_110_30466;
	.loc	22	456	0
	mov.f32 	%f32, %f29;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__287_5;
$Lt_110_30466:
	.loc	22	460	0
	mov.f32 	%f32, %f28;
$LDWendi__Z5ClampIfET_S0_S0_S0__287_5:
	.loc	22	482	0
	mov.f32 	%f33, 0f3f800000;    	// 1
	sub.ftz.f32 	%f34, %f33, %f8;
	rcp.approx.ftz.f32 	%f35, %f13;
	mul.ftz.f32 	%f36, %f10, %f35;
	mov.f32 	%f37, 0f3f800000;    	// 1
	mul.ftz.f32 	%f38, %f10, %f35;
	sub.ftz.f32 	%f39, %f37, %f38;
	mul.ftz.f32 	%f40, %f32, %f8;
	fma.rn.ftz.f32 	%f41, %f1, %f34, %f40;
	mul.ftz.f32 	%f42, %f36, %f41;
	fma.rn.ftz.f32 	%f20, %f5, %f39, %f42;
	.loc	22	450	0
	mov.f32 	%f43, 0f00000000;    	// 0
	max.ftz.f32 	%f44, %f2, %f43;
	mov.f32 	%f45, 0f00000000;    	// 0
	max.ftz.f32 	%f46, %f6, %f45;
	mov.f32 	%f47, 0f3f800000;    	// 1
	min.ftz.f32 	%f48, %f44, %f47;
	mov.f32 	%f49, 0f3f800000;    	// 1
	min.ftz.f32 	%f50, %f46, %f49;
	add.ftz.f32 	%f51, %f48, %f48;
	mov.f32 	%f52, 0fbf800000;    	// -1
	add.ftz.f32 	%f53, %f51, %f52;
	setp.lt.ftz.f32 	%p6, %f50, %f53;
	@!%p6 bra 	$Lt_110_30722;
	.loc	22	452	0
	mov.f32 	%f54, %f53;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__287_3;
$Lt_110_30722:
	.loc	22	454	0
	setp.gt.ftz.f32 	%p7, %f50, %f51;
	@!%p7 bra 	$Lt_110_30978;
	.loc	22	456	0
	mov.f32 	%f54, %f51;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__287_3;
$Lt_110_30978:
	.loc	22	460	0
	mov.f32 	%f54, %f50;
$LDWendi__Z5ClampIfET_S0_S0_S0__287_3:
	.loc	22	482	0
	mul.ftz.f32 	%f55, %f54, %f8;
	fma.rn.ftz.f32 	%f56, %f2, %f34, %f55;
	mul.ftz.f32 	%f57, %f36, %f56;
	fma.rn.ftz.f32 	%f19, %f6, %f39, %f57;
	.loc	22	450	0
	mov.f32 	%f58, 0f00000000;    	// 0
	max.ftz.f32 	%f59, %f3, %f58;
	mov.f32 	%f60, 0f00000000;    	// 0
	max.ftz.f32 	%f61, %f7, %f60;
	mov.f32 	%f62, 0f3f800000;    	// 1
	min.ftz.f32 	%f63, %f59, %f62;
	mov.f32 	%f64, 0f3f800000;    	// 1
	min.ftz.f32 	%f65, %f61, %f64;
	add.ftz.f32 	%f66, %f63, %f63;
	mov.f32 	%f67, 0fbf800000;    	// -1
	add.ftz.f32 	%f68, %f66, %f67;
	setp.lt.ftz.f32 	%p8, %f65, %f68;
	@!%p8 bra 	$Lt_110_31234;
	.loc	22	452	0
	mov.f32 	%f69, %f68;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__287_1;
$Lt_110_31234:
	.loc	22	454	0
	setp.gt.ftz.f32 	%p9, %f65, %f66;
	@!%p9 bra 	$Lt_110_31490;
	.loc	22	456	0
	mov.f32 	%f69, %f66;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__287_1;
$Lt_110_31490:
	.loc	22	460	0
	mov.f32 	%f69, %f65;
$LDWendi__Z5ClampIfET_S0_S0_S0__287_1:
	.loc	22	482	0
	mul.ftz.f32 	%f70, %f69, %f8;
	fma.rn.ftz.f32 	%f71, %f3, %f34, %f70;
	mul.ftz.f32 	%f72, %f36, %f71;
	fma.rn.ftz.f32 	%f18, %f7, %f39, %f72;
$Lt_110_35074:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_PinLight_Kernel_inDest];
	@!%p2 bra 	$Lt_110_35842;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	482	0
	bra.uni 	$Lt_110_35586;
$Lt_110_35842:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_110_35586:
$Lt_110_33538:
	.loc	22	482	0
	exit;
$LDWend_BlendMode_IR_BlendMode_PinLight_Kernel:
	} // BlendMode_IR_BlendMode_PinLight_Kernel

	.entry BlendMode_IR_BlendMode_HardMix_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<53>;
	.reg .pred %p<8>;
	.loc	22	483	0
$LDWbegin_BlendMode_IR_BlendMode_HardMix_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_111_22018;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inSrc0];
	@!%p2 bra 	$Lt_111_22786;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	483	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_111_22530;
$Lt_111_22786:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_111_22530:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inSrc1];
	@!%p2 bra 	$Lt_111_23298;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	483	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_111_23042;
$Lt_111_23298:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_111_23042:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_111_23810;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_111_23554;
$Lt_111_23810:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	mov.f32 	%f29, 0f3f800000;    	// 1
	mov.f32 	%f30, 0f3f800000;    	// 1
	sub.ftz.f32 	%f31, %f30, %f5;
	setp.lt.ftz.f32 	%p4, %f1, %f31;
	selp.f32 	%f32, %f28, %f29, %p4;
	mul.ftz.f32 	%f33, %f32, %f8;
	fma.rn.ftz.f32 	%f34, %f1, %f22, %f33;
	mul.ftz.f32 	%f35, %f24, %f34;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f35;
	mov.f32 	%f36, 0f00000000;    	// 0
	mov.f32 	%f37, 0f3f800000;    	// 1
	mov.f32 	%f38, 0f3f800000;    	// 1
	sub.ftz.f32 	%f39, %f38, %f6;
	setp.lt.ftz.f32 	%p5, %f2, %f39;
	selp.f32 	%f40, %f36, %f37, %p5;
	mul.ftz.f32 	%f41, %f40, %f8;
	fma.rn.ftz.f32 	%f42, %f2, %f22, %f41;
	mul.ftz.f32 	%f43, %f24, %f42;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f43;
	mov.f32 	%f44, 0f00000000;    	// 0
	mov.f32 	%f45, 0f3f800000;    	// 1
	mov.f32 	%f46, 0f3f800000;    	// 1
	sub.ftz.f32 	%f47, %f46, %f7;
	setp.lt.ftz.f32 	%p6, %f3, %f47;
	selp.f32 	%f48, %f44, %f45, %p6;
	mul.ftz.f32 	%f49, %f48, %f8;
	fma.rn.ftz.f32 	%f50, %f3, %f22, %f49;
	mul.ftz.f32 	%f51, %f24, %f50;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f51;
$Lt_111_23554:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_HardMix_Kernel_inDest];
	@!%p2 bra 	$Lt_111_24322;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	483	0
	bra.uni 	$Lt_111_24066;
$Lt_111_24322:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_111_24066:
$Lt_111_22018:
	.loc	22	483	0
	exit;
$LDWend_BlendMode_IR_BlendMode_HardMix_Kernel:
	} // BlendMode_IR_BlendMode_HardMix_Kernel

	.entry BlendMode_IR_BlendMode_Difference_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<44>;
	.reg .pred %p<5>;
	.loc	22	484	0
$LDWbegin_BlendMode_IR_BlendMode_Difference_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_112_19714;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inSrc0];
	@!%p2 bra 	$Lt_112_20482;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	484	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_112_20226;
$Lt_112_20482:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_112_20226:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inSrc1];
	@!%p2 bra 	$Lt_112_20994;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	484	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_112_20738;
$Lt_112_20994:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_112_20738:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_112_21506;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_112_21250;
$Lt_112_21506:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	sub.ftz.f32 	%f28, %f1, %f5;
	abs.ftz.f32 	%f29, %f28;
	mul.ftz.f32 	%f30, %f8, %f29;
	fma.rn.ftz.f32 	%f31, %f1, %f22, %f30;
	mul.ftz.f32 	%f32, %f24, %f31;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f32;
	sub.ftz.f32 	%f33, %f2, %f6;
	abs.ftz.f32 	%f34, %f33;
	mul.ftz.f32 	%f35, %f8, %f34;
	fma.rn.ftz.f32 	%f36, %f2, %f22, %f35;
	mul.ftz.f32 	%f37, %f24, %f36;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f37;
	sub.ftz.f32 	%f38, %f3, %f7;
	abs.ftz.f32 	%f39, %f38;
	mul.ftz.f32 	%f40, %f8, %f39;
	fma.rn.ftz.f32 	%f41, %f3, %f22, %f40;
	mul.ftz.f32 	%f42, %f24, %f41;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f42;
$Lt_112_21250:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Difference_Kernel_inDest];
	@!%p2 bra 	$Lt_112_22018;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	484	0
	bra.uni 	$Lt_112_21762;
$Lt_112_22018:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_112_21762:
$Lt_112_19714:
	.loc	22	484	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Difference_Kernel:
	} // BlendMode_IR_BlendMode_Difference_Kernel

	.entry BlendMode_IR_BlendMode_Exclusion_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<86>;
	.reg .pred %p<5>;
	.loc	22	485	0
$LDWbegin_BlendMode_IR_BlendMode_Exclusion_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_113_28930;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inSrc0];
	@!%p2 bra 	$Lt_113_29698;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	485	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_113_29442;
$Lt_113_29698:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_113_29442:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inSrc1];
	@!%p2 bra 	$Lt_113_30210;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	485	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_113_29954;
$Lt_113_30210:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_113_29954:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_113_30722;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_113_30466;
$Lt_113_30722:
	mov.f32 	%f21, 0f00000000;    	// 0
	max.ftz.f32 	%f22, %f1, %f21;
	mov.f32 	%f23, 0f00000000;    	// 0
	max.ftz.f32 	%f24, %f5, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	sub.ftz.f32 	%f26, %f25, %f8;
	mov.f32 	%f27, 0f3f800000;    	// 1
	min.ftz.f32 	%f28, %f22, %f27;
	mov.f32 	%f29, 0f3f800000;    	// 1
	min.ftz.f32 	%f30, %f24, %f29;
	rcp.approx.ftz.f32 	%f31, %f13;
	mul.ftz.f32 	%f32, %f10, %f31;
	mov.f32 	%f33, 0f3f800000;    	// 1
	mul.ftz.f32 	%f34, %f10, %f31;
	sub.ftz.f32 	%f35, %f33, %f34;
	add.ftz.f32 	%f36, %f28, %f30;
	add.ftz.f32 	%f37, %f28, %f28;
	mul.ftz.f32 	%f38, %f30, %f37;
	sub.ftz.f32 	%f39, %f36, %f38;
	mov.f32 	%f40, 0f00000000;    	// 0
	max.ftz.f32 	%f41, %f39, %f40;
	mov.f32 	%f42, 0f3f800000;    	// 1
	min.ftz.f32 	%f43, %f41, %f42;
	mul.ftz.f32 	%f44, %f8, %f43;
	fma.rn.ftz.f32 	%f45, %f1, %f26, %f44;
	mul.ftz.f32 	%f46, %f32, %f45;
	fma.rn.ftz.f32 	%f20, %f5, %f35, %f46;
	mov.f32 	%f47, 0f00000000;    	// 0
	max.ftz.f32 	%f48, %f2, %f47;
	mov.f32 	%f49, 0f00000000;    	// 0
	max.ftz.f32 	%f50, %f6, %f49;
	mov.f32 	%f51, 0f3f800000;    	// 1
	min.ftz.f32 	%f52, %f48, %f51;
	mov.f32 	%f53, 0f3f800000;    	// 1
	min.ftz.f32 	%f54, %f50, %f53;
	add.ftz.f32 	%f55, %f52, %f54;
	add.ftz.f32 	%f56, %f52, %f52;
	mul.ftz.f32 	%f57, %f54, %f56;
	sub.ftz.f32 	%f58, %f55, %f57;
	mov.f32 	%f59, 0f00000000;    	// 0
	max.ftz.f32 	%f60, %f58, %f59;
	mov.f32 	%f61, 0f3f800000;    	// 1
	min.ftz.f32 	%f62, %f60, %f61;
	mul.ftz.f32 	%f63, %f8, %f62;
	fma.rn.ftz.f32 	%f64, %f2, %f26, %f63;
	mul.ftz.f32 	%f65, %f32, %f64;
	fma.rn.ftz.f32 	%f19, %f6, %f35, %f65;
	mov.f32 	%f66, 0f00000000;    	// 0
	max.ftz.f32 	%f67, %f3, %f66;
	mov.f32 	%f68, 0f00000000;    	// 0
	max.ftz.f32 	%f69, %f7, %f68;
	mov.f32 	%f70, 0f3f800000;    	// 1
	min.ftz.f32 	%f71, %f67, %f70;
	mov.f32 	%f72, 0f3f800000;    	// 1
	min.ftz.f32 	%f73, %f69, %f72;
	add.ftz.f32 	%f74, %f71, %f73;
	add.ftz.f32 	%f75, %f71, %f71;
	mul.ftz.f32 	%f76, %f73, %f75;
	sub.ftz.f32 	%f77, %f74, %f76;
	mov.f32 	%f78, 0f00000000;    	// 0
	max.ftz.f32 	%f79, %f77, %f78;
	mov.f32 	%f80, 0f3f800000;    	// 1
	min.ftz.f32 	%f81, %f79, %f80;
	mul.ftz.f32 	%f82, %f8, %f81;
	fma.rn.ftz.f32 	%f83, %f3, %f26, %f82;
	mul.ftz.f32 	%f84, %f32, %f83;
	fma.rn.ftz.f32 	%f18, %f7, %f35, %f84;
$Lt_113_30466:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Exclusion_Kernel_inDest];
	@!%p2 bra 	$Lt_113_31234;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	485	0
	bra.uni 	$Lt_113_30978;
$Lt_113_31234:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_113_30978:
$Lt_113_28930:
	.loc	22	485	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Exclusion_Kernel:
	} // BlendMode_IR_BlendMode_Exclusion_Kernel

	.entry BlendMode_IR_BlendMode_Subtract_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<77>;
	.reg .pred %p<5>;
	.loc	22	486	0
$LDWbegin_BlendMode_IR_BlendMode_Subtract_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_114_28930;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inSrc0];
	@!%p2 bra 	$Lt_114_29698;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	486	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_114_29442;
$Lt_114_29698:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_114_29442:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inSrc1];
	@!%p2 bra 	$Lt_114_30210;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	486	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_114_29954;
$Lt_114_30210:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_114_29954:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_114_30722;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_114_30466;
$Lt_114_30722:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	max.ftz.f32 	%f29, %f5, %f28;
	mov.f32 	%f30, 0f3f800000;    	// 1
	min.ftz.f32 	%f31, %f29, %f30;
	mov.f32 	%f32, 0f00000000;    	// 0
	max.ftz.f32 	%f33, %f1, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	min.ftz.f32 	%f35, %f33, %f34;
	sub.ftz.f32 	%f36, %f31, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	max.ftz.f32 	%f38, %f36, %f37;
	mov.f32 	%f39, 0f3f800000;    	// 1
	min.ftz.f32 	%f40, %f38, %f39;
	mul.ftz.f32 	%f41, %f8, %f40;
	fma.rn.ftz.f32 	%f42, %f1, %f22, %f41;
	mul.ftz.f32 	%f43, %f24, %f42;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f43;
	mov.f32 	%f44, 0f00000000;    	// 0
	max.ftz.f32 	%f45, %f6, %f44;
	mov.f32 	%f46, 0f3f800000;    	// 1
	min.ftz.f32 	%f47, %f45, %f46;
	mov.f32 	%f48, 0f00000000;    	// 0
	max.ftz.f32 	%f49, %f2, %f48;
	mov.f32 	%f50, 0f3f800000;    	// 1
	min.ftz.f32 	%f51, %f49, %f50;
	sub.ftz.f32 	%f52, %f47, %f51;
	mov.f32 	%f53, 0f00000000;    	// 0
	max.ftz.f32 	%f54, %f52, %f53;
	mov.f32 	%f55, 0f3f800000;    	// 1
	min.ftz.f32 	%f56, %f54, %f55;
	mul.ftz.f32 	%f57, %f8, %f56;
	fma.rn.ftz.f32 	%f58, %f2, %f22, %f57;
	mul.ftz.f32 	%f59, %f24, %f58;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f59;
	mov.f32 	%f60, 0f00000000;    	// 0
	max.ftz.f32 	%f61, %f7, %f60;
	mov.f32 	%f62, 0f3f800000;    	// 1
	min.ftz.f32 	%f63, %f61, %f62;
	mov.f32 	%f64, 0f00000000;    	// 0
	max.ftz.f32 	%f65, %f3, %f64;
	mov.f32 	%f66, 0f3f800000;    	// 1
	min.ftz.f32 	%f67, %f65, %f66;
	sub.ftz.f32 	%f68, %f63, %f67;
	mov.f32 	%f69, 0f00000000;    	// 0
	max.ftz.f32 	%f70, %f68, %f69;
	mov.f32 	%f71, 0f3f800000;    	// 1
	min.ftz.f32 	%f72, %f70, %f71;
	mul.ftz.f32 	%f73, %f8, %f72;
	fma.rn.ftz.f32 	%f74, %f3, %f22, %f73;
	mul.ftz.f32 	%f75, %f24, %f74;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f75;
$Lt_114_30466:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Subtract_Kernel_inDest];
	@!%p2 bra 	$Lt_114_31234;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	486	0
	bra.uni 	$Lt_114_30978;
$Lt_114_31234:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_114_30978:
$Lt_114_28930:
	.loc	22	486	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Subtract_Kernel:
	} // BlendMode_IR_BlendMode_Subtract_Kernel

	.entry BlendMode_IR_BlendMode_Divide_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<77>;
	.reg .pred %p<5>;
	.loc	22	487	0
$LDWbegin_BlendMode_IR_BlendMode_Divide_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_115_28930;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inSrc0];
	@!%p2 bra 	$Lt_115_29698;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	487	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_115_29442;
$Lt_115_29698:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_115_29442:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inSrc1];
	@!%p2 bra 	$Lt_115_30210;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	487	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_115_29954;
$Lt_115_30210:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_115_29954:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	add.ftz.f32 	%f11, %f10, %f8;
	mul.ftz.f32 	%f12, %f10, %f8;
	sub.ftz.f32 	%f13, %f11, %f12;
	mov.f32 	%f14, %f13;
	mov.f32 	%f15, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f16, %f13, %f15;
	mov.f32 	%f17, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f16, %f17;
	@!%p3 bra 	$Lt_115_30722;
	mov.f32 	%f18, 0f00000000;    	// 0
	mov.f32 	%f19, 0f00000000;    	// 0
	mov.f32 	%f20, 0f00000000;    	// 0
	mov.f32 	%f14, 0f00000000;    	// 0
	bra.uni 	$Lt_115_30466;
$Lt_115_30722:
	mov.f32 	%f21, 0f3f800000;    	// 1
	sub.ftz.f32 	%f22, %f21, %f8;
	rcp.approx.ftz.f32 	%f23, %f13;
	mul.ftz.f32 	%f24, %f10, %f23;
	mov.f32 	%f25, 0f3f800000;    	// 1
	mul.ftz.f32 	%f26, %f10, %f23;
	sub.ftz.f32 	%f27, %f25, %f26;
	mov.f32 	%f28, 0f00000000;    	// 0
	max.ftz.f32 	%f29, %f5, %f28;
	mov.f32 	%f30, 0f3f800000;    	// 1
	min.ftz.f32 	%f31, %f29, %f30;
	mov.f32 	%f32, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f33, %f1, %f32;
	mov.f32 	%f34, 0f3f800000;    	// 1
	min.ftz.f32 	%f35, %f33, %f34;
	div.approx.ftz.f32 	%f36, %f31, %f35;
	mov.f32 	%f37, 0f00000000;    	// 0
	max.ftz.f32 	%f38, %f36, %f37;
	mov.f32 	%f39, 0f3f800000;    	// 1
	min.ftz.f32 	%f40, %f38, %f39;
	mul.ftz.f32 	%f41, %f8, %f40;
	fma.rn.ftz.f32 	%f42, %f1, %f22, %f41;
	mul.ftz.f32 	%f43, %f24, %f42;
	fma.rn.ftz.f32 	%f20, %f5, %f27, %f43;
	mov.f32 	%f44, 0f00000000;    	// 0
	max.ftz.f32 	%f45, %f6, %f44;
	mov.f32 	%f46, 0f3f800000;    	// 1
	min.ftz.f32 	%f47, %f45, %f46;
	mov.f32 	%f48, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f49, %f2, %f48;
	mov.f32 	%f50, 0f3f800000;    	// 1
	min.ftz.f32 	%f51, %f49, %f50;
	div.approx.ftz.f32 	%f52, %f47, %f51;
	mov.f32 	%f53, 0f00000000;    	// 0
	max.ftz.f32 	%f54, %f52, %f53;
	mov.f32 	%f55, 0f3f800000;    	// 1
	min.ftz.f32 	%f56, %f54, %f55;
	mul.ftz.f32 	%f57, %f8, %f56;
	fma.rn.ftz.f32 	%f58, %f2, %f22, %f57;
	mul.ftz.f32 	%f59, %f24, %f58;
	fma.rn.ftz.f32 	%f19, %f6, %f27, %f59;
	mov.f32 	%f60, 0f00000000;    	// 0
	max.ftz.f32 	%f61, %f7, %f60;
	mov.f32 	%f62, 0f3f800000;    	// 1
	min.ftz.f32 	%f63, %f61, %f62;
	mov.f32 	%f64, 0f33d6bf95;    	// 1e-007
	max.ftz.f32 	%f65, %f3, %f64;
	mov.f32 	%f66, 0f3f800000;    	// 1
	min.ftz.f32 	%f67, %f65, %f66;
	div.approx.ftz.f32 	%f68, %f63, %f67;
	mov.f32 	%f69, 0f00000000;    	// 0
	max.ftz.f32 	%f70, %f68, %f69;
	mov.f32 	%f71, 0f3f800000;    	// 1
	min.ftz.f32 	%f72, %f70, %f71;
	mul.ftz.f32 	%f73, %f8, %f72;
	fma.rn.ftz.f32 	%f74, %f3, %f22, %f73;
	mul.ftz.f32 	%f75, %f24, %f74;
	fma.rn.ftz.f32 	%f18, %f7, %f27, %f75;
$Lt_115_30466:
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Divide_Kernel_inDest];
	@!%p2 bra 	$Lt_115_31234;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f20;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f19;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f18;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	487	0
	bra.uni 	$Lt_115_30978;
$Lt_115_31234:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f20,%f19,%f18,%f14};
$Lt_115_30978:
$Lt_115_28930:
	.loc	22	487	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Divide_Kernel:
	} // BlendMode_IR_BlendMode_Divide_Kernel

	.entry BlendMode_IR_BlendMode_Hue_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<119>;
	.reg .pred %p<22>;
	.loc	22	541	0
$LDWbegin_BlendMode_IR_BlendMode_Hue_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_116_46082;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inSrc0];
	@!%p2 bra 	$Lt_116_38146;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	541	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_116_37890;
$Lt_116_38146:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_116_37890:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inSrc1];
	@!%p2 bra 	$Lt_116_38658;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	541	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_116_38402;
$Lt_116_38658:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_116_38402:
	.loc	22	154	0
	setp.lt.ftz.f32 	%p3, %f1, %f2;
	max.ftz.f32 	%f9, %f1, %f2;
	selp.f32 	%f10, %f1, %f2, %p3;
	max.ftz.f32 	%f11, %f9, %f3;
	setp.lt.ftz.f32 	%p4, %f10, %f3;
	selp.f32 	%f12, %f10, %f3, %p4;
	setp.eq.ftz.f32 	%p5, %f12, %f3;
	@!%p5 bra 	$Lt_116_39170;
	setp.eq.ftz.f32 	%p6, %f11, %f2;
	@!%p6 bra 	$Lt_116_39682;
	setp.gt.ftz.f32 	%p7, %f2, %f3;
	@!%p7 bra 	$Lt_116_40194;
	.loc	22	161	0
	max.ftz.f32 	%f13, %f5, %f6;
	setp.lt.ftz.f32 	%p8, %f5, %f6;
	max.ftz.f32 	%f14, %f13, %f7;
	selp.f32 	%f15, %f5, %f6, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f7;
	selp.f32 	%f16, %f15, %f7, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f19, %f1, %f3;
	mul.ftz.f32 	%f20, %f18, %f19;
	sub.ftz.f32 	%f21, %f2, %f3;
	div.approx.ftz.f32 	%f22, %f20, %f21;
	.loc	22	162	0
	mov.f32 	%f23, %f18;
	bra.uni 	$Lt_116_40450;
$Lt_116_40194:
	.loc	22	166	0
	mov.f32 	%f22, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_116_40450;
$Lt_116_39682:
	setp.gt.ftz.f32 	%p10, %f1, %f3;
	@!%p10 bra 	$Lt_116_40706;
	.loc	22	173	0
	max.ftz.f32 	%f13, %f5, %f6;
	setp.lt.ftz.f32 	%p8, %f5, %f6;
	max.ftz.f32 	%f14, %f13, %f7;
	selp.f32 	%f15, %f5, %f6, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f7;
	selp.f32 	%f16, %f15, %f7, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f24, %f2, %f3;
	mul.ftz.f32 	%f25, %f18, %f24;
	sub.ftz.f32 	%f26, %f1, %f3;
	div.approx.ftz.f32 	%f23, %f25, %f26;
	.loc	22	174	0
	mov.f32 	%f22, %f18;
	bra.uni 	$Lt_116_40450;
$Lt_116_40706:
	.loc	22	178	0
	mov.f32 	%f22, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
$Lt_116_40450:
$Lt_116_39426:
	mov.f32 	%f27, 0f00000000;    	// 0
	bra.uni 	$Lt_116_43010;
$Lt_116_39170:
	setp.eq.ftz.f32 	%p11, %f12, %f2;
	setp.eq.ftz.f32 	%p12, %f11, %f3;
	@!%p12 bra 	$Lt_116_41218;
	@!%p11 bra 	$Lt_116_41730;
	setp.lt.ftz.f32 	%p13, %f2, %f3;
	@!%p13 bra 	$Lt_116_42242;
	.loc	22	191	0
	max.ftz.f32 	%f13, %f5, %f6;
	setp.lt.ftz.f32 	%p8, %f5, %f6;
	max.ftz.f32 	%f14, %f13, %f7;
	selp.f32 	%f15, %f5, %f6, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f7;
	selp.f32 	%f16, %f15, %f7, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f28, %f1, %f2;
	mul.ftz.f32 	%f29, %f18, %f28;
	sub.ftz.f32 	%f30, %f3, %f2;
	div.approx.ftz.f32 	%f22, %f29, %f30;
	.loc	22	192	0
	mov.f32 	%f27, %f18;
	bra.uni 	$Lt_116_41986;
$Lt_116_42242:
	.loc	22	196	0
	mov.f32 	%f22, 0f00000000;    	// 0
	mov.f32 	%f27, 0f00000000;    	// 0
$Lt_116_41986:
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_116_43010;
$Lt_116_41730:
	setp.lt.ftz.f32 	%p14, %f1, %f3;
	@!%p14 bra 	$Lt_116_42754;
	.loc	22	204	0
	max.ftz.f32 	%f13, %f5, %f6;
	setp.lt.ftz.f32 	%p8, %f5, %f6;
	max.ftz.f32 	%f14, %f13, %f7;
	selp.f32 	%f15, %f5, %f6, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f7;
	selp.f32 	%f16, %f15, %f7, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f31, %f2, %f1;
	mul.ftz.f32 	%f32, %f18, %f31;
	sub.ftz.f32 	%f33, %f3, %f1;
	div.approx.ftz.f32 	%f23, %f32, %f33;
	.loc	22	205	0
	mov.f32 	%f27, %f18;
	bra.uni 	$Lt_116_42498;
$Lt_116_42754:
	.loc	22	209	0
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
$Lt_116_42498:
	.loc	22	211	0
	mov.f32 	%f22, 0f00000000;    	// 0
	bra.uni 	$Lt_116_43010;
$Lt_116_41218:
	@!%p11 bra 	$Lt_116_43266;
	setp.gt.ftz.f32 	%p15, %f1, %f2;
	@!%p15 bra 	$Lt_116_43778;
	.loc	22	220	0
	max.ftz.f32 	%f13, %f5, %f6;
	setp.lt.ftz.f32 	%p8, %f5, %f6;
	max.ftz.f32 	%f14, %f13, %f7;
	selp.f32 	%f15, %f5, %f6, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f7;
	selp.f32 	%f16, %f15, %f7, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f34, %f3, %f2;
	mul.ftz.f32 	%f35, %f18, %f34;
	sub.ftz.f32 	%f36, %f1, %f2;
	div.approx.ftz.f32 	%f27, %f35, %f36;
	.loc	22	221	0
	mov.f32 	%f22, %f18;
	bra.uni 	$Lt_116_43522;
$Lt_116_43778:
	.loc	22	225	0
	mov.f32 	%f22, 0f00000000;    	// 0
	mov.f32 	%f27, 0f00000000;    	// 0
$Lt_116_43522:
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_116_43010;
$Lt_116_43266:
	@!%p3 bra 	$Lt_116_44290;
	.loc	22	233	0
	max.ftz.f32 	%f13, %f5, %f6;
	setp.lt.ftz.f32 	%p8, %f5, %f6;
	max.ftz.f32 	%f14, %f13, %f7;
	selp.f32 	%f15, %f5, %f6, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f7;
	selp.f32 	%f16, %f15, %f7, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f37, %f3, %f1;
	mul.ftz.f32 	%f38, %f18, %f37;
	sub.ftz.f32 	%f39, %f2, %f1;
	div.approx.ftz.f32 	%f27, %f38, %f39;
	.loc	22	234	0
	mov.f32 	%f23, %f18;
	bra.uni 	$Lt_116_44034;
$Lt_116_44290:
	.loc	22	238	0
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
$Lt_116_44034:
	.loc	22	240	0
	mov.f32 	%f22, 0f00000000;    	// 0
$Lt_116_43010:
$Lt_116_40962:
$Lt_116_38914:
	.loc	22	113	0
	ld.const.f32 	%f40, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f41, %f6, %f40;
	mul.ftz.f32 	%f42, %f40, %f23;
	ld.const.f32 	%f43, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f44, %f43, %f7, %f41;
	fma.rn.ftz.f32 	%f45, %f43, %f27, %f42;
	ld.const.f32 	%f46, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f47, %f46, %f5, %f44;
	fma.rn.ftz.f32 	%f48, %f46, %f22, %f45;
	cvt.ftz.sat.f32.f32 	%f49, %f47;
	cvt.ftz.sat.f32.f32 	%f50, %f48;
	sub.ftz.f32 	%f51, %f49, %f50;
	add.ftz.f32 	%f52, %f51, %f22;
	mov.f32 	%f53, %f52;
	add.ftz.f32 	%f54, %f51, %f23;
	mov.f32 	%f55, %f54;
	add.ftz.f32 	%f56, %f51, %f27;
	mov.f32 	%f57, %f56;
	.loc	22	50	0
	mul.ftz.f32 	%f58, %f54, %f40;
	fma.rn.ftz.f32 	%f59, %f43, %f56, %f58;
	fma.rn.ftz.f32 	%f60, %f46, %f52, %f59;
	cvt.ftz.sat.f32.f32 	%f61, %f60;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p16, %f54, %f52;
	selp.f32 	%f62, %f52, %f54, %p16;
	setp.lt.ftz.f32 	%p17, %f62, %f56;
	selp.f32 	%f63, %f62, %f56, %p17;
	mov.f32 	%f64, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p18, %f63, %f64;
	@!%p18 bra 	$Lt_116_44546;
	.loc	22	119	0
	sub.ftz.f32 	%f65, %f61, %f63;
	sub.ftz.f32 	%f66, %f56, %f61;
	mul.ftz.f32 	%f67, %f61, %f66;
	div.approx.ftz.f32 	%f68, %f67, %f65;
	add.ftz.f32 	%f57, %f61, %f68;
	.loc	22	120	0
	sub.ftz.f32 	%f69, %f54, %f61;
	mul.ftz.f32 	%f70, %f61, %f69;
	div.approx.ftz.f32 	%f71, %f70, %f65;
	add.ftz.f32 	%f55, %f61, %f71;
	.loc	22	121	0
	sub.ftz.f32 	%f72, %f52, %f61;
	mul.ftz.f32 	%f73, %f61, %f72;
	div.approx.ftz.f32 	%f74, %f73, %f65;
	add.ftz.f32 	%f53, %f61, %f74;
$Lt_116_44546:
	max.ftz.f32 	%f75, %f54, %f52;
	max.ftz.f32 	%f76, %f75, %f56;
	mov.f32 	%f77, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p19, %f76, %f77;
	@!%p19 bra 	$Lt_116_45058;
	.loc	27	529	0
	mov.f32 	%f78, 0f3f800000;    	// 1
	sub.ftz.f32 	%f79, %f78, %f61;
	sub.ftz.f32 	%f80, %f76, %f61;
	sub.ftz.f32 	%f81, %f57, %f61;
	mul.ftz.f32 	%f82, %f79, %f81;
	div.approx.ftz.f32 	%f83, %f82, %f80;
	.loc	22	125	0
	add.ftz.f32 	%f57, %f83, %f61;
	.loc	27	529	0
	sub.ftz.f32 	%f84, %f55, %f61;
	mul.ftz.f32 	%f85, %f79, %f84;
	div.approx.ftz.f32 	%f86, %f85, %f80;
	.loc	22	126	0
	add.ftz.f32 	%f55, %f86, %f61;
	.loc	27	529	0
	sub.ftz.f32 	%f87, %f53, %f61;
	mul.ftz.f32 	%f88, %f79, %f87;
	div.approx.ftz.f32 	%f89, %f88, %f80;
	.loc	22	127	0
	add.ftz.f32 	%f53, %f89, %f61;
$Lt_116_45058:
	.loc	22	468	0
	ld.param.f32 	%f90, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inAlphaGain];
	mul.ftz.f32 	%f91, %f90, %f4;
	add.ftz.f32 	%f92, %f91, %f8;
	mul.ftz.f32 	%f93, %f91, %f8;
	sub.ftz.f32 	%f94, %f92, %f93;
	mov.f32 	%f95, %f94;
	mov.f32 	%f96, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f97, %f94, %f96;
	mov.f32 	%f98, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p20, %f97, %f98;
	@!%p20 bra 	$Lt_116_45826;
	mov.f32 	%f99, 0f00000000;    	// 0
	mov.f32 	%f100, 0f00000000;   	// 0
	mov.f32 	%f101, 0f00000000;   	// 0
	mov.f32 	%f95, 0f00000000;    	// 0
	bra.uni 	$Lt_116_45570;
$Lt_116_45826:
	mov.f32 	%f102, 0f3f800000;   	// 1
	sub.ftz.f32 	%f103, %f102, %f8;
	rcp.approx.ftz.f32 	%f104, %f94;
	mul.ftz.f32 	%f105, %f91, %f104;
	mov.f32 	%f106, 0f3f800000;   	// 1
	mul.ftz.f32 	%f107, %f91, %f104;
	sub.ftz.f32 	%f108, %f106, %f107;
	mul.ftz.f32 	%f109, %f103, %f53;
	fma.rn.ftz.f32 	%f110, %f53, %f8, %f109;
	mul.ftz.f32 	%f111, %f105, %f110;
	fma.rn.ftz.f32 	%f101, %f5, %f108, %f111;
	mul.ftz.f32 	%f112, %f103, %f55;
	fma.rn.ftz.f32 	%f113, %f55, %f8, %f112;
	mul.ftz.f32 	%f114, %f105, %f113;
	fma.rn.ftz.f32 	%f100, %f6, %f108, %f114;
	mul.ftz.f32 	%f115, %f103, %f57;
	fma.rn.ftz.f32 	%f116, %f57, %f8, %f115;
	mul.ftz.f32 	%f117, %f105, %f116;
	fma.rn.ftz.f32 	%f99, %f7, %f108, %f117;
$Lt_116_45570:
	.loc	22	541	0
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Hue_Kernel_inDest];
	@!%p2 bra 	$Lt_116_46338;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f101;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f100;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f99;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f95;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	541	0
	bra.uni 	$Lt_116_46082;
$Lt_116_46338:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f101,%f100,%f99,%f95};
$Lt_116_46082:
$Lt_116_37378:
	.loc	22	541	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Hue_Kernel:
	} // BlendMode_IR_BlendMode_Hue_Kernel

	.entry BlendMode_IR_BlendMode_Saturation_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<119>;
	.reg .pred %p<22>;
	.loc	22	542	0
$LDWbegin_BlendMode_IR_BlendMode_Saturation_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_117_46082;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inSrc0];
	@!%p2 bra 	$Lt_117_38146;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	542	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_117_37890;
$Lt_117_38146:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_117_37890:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inSrc1];
	@!%p2 bra 	$Lt_117_38658;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	542	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_117_38402;
$Lt_117_38658:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_117_38402:
	.loc	22	154	0
	setp.lt.ftz.f32 	%p3, %f5, %f6;
	max.ftz.f32 	%f9, %f5, %f6;
	selp.f32 	%f10, %f5, %f6, %p3;
	max.ftz.f32 	%f11, %f9, %f7;
	setp.lt.ftz.f32 	%p4, %f10, %f7;
	selp.f32 	%f12, %f10, %f7, %p4;
	setp.eq.ftz.f32 	%p5, %f12, %f7;
	@!%p5 bra 	$Lt_117_39170;
	setp.eq.ftz.f32 	%p6, %f11, %f6;
	@!%p6 bra 	$Lt_117_39682;
	setp.gt.ftz.f32 	%p7, %f6, %f7;
	@!%p7 bra 	$Lt_117_40194;
	.loc	22	161	0
	max.ftz.f32 	%f13, %f1, %f2;
	setp.lt.ftz.f32 	%p8, %f1, %f2;
	max.ftz.f32 	%f14, %f13, %f3;
	selp.f32 	%f15, %f1, %f2, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f3;
	selp.f32 	%f16, %f15, %f3, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f19, %f5, %f7;
	mul.ftz.f32 	%f20, %f18, %f19;
	sub.ftz.f32 	%f21, %f6, %f7;
	div.approx.ftz.f32 	%f22, %f20, %f21;
	.loc	22	162	0
	mov.f32 	%f23, %f18;
	bra.uni 	$Lt_117_40450;
$Lt_117_40194:
	.loc	22	166	0
	mov.f32 	%f22, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_117_40450;
$Lt_117_39682:
	setp.gt.ftz.f32 	%p10, %f5, %f7;
	@!%p10 bra 	$Lt_117_40706;
	.loc	22	173	0
	max.ftz.f32 	%f13, %f1, %f2;
	setp.lt.ftz.f32 	%p8, %f1, %f2;
	max.ftz.f32 	%f14, %f13, %f3;
	selp.f32 	%f15, %f1, %f2, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f3;
	selp.f32 	%f16, %f15, %f3, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f24, %f6, %f7;
	mul.ftz.f32 	%f25, %f18, %f24;
	sub.ftz.f32 	%f26, %f5, %f7;
	div.approx.ftz.f32 	%f23, %f25, %f26;
	.loc	22	174	0
	mov.f32 	%f22, %f18;
	bra.uni 	$Lt_117_40450;
$Lt_117_40706:
	.loc	22	178	0
	mov.f32 	%f22, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
$Lt_117_40450:
$Lt_117_39426:
	mov.f32 	%f27, 0f00000000;    	// 0
	bra.uni 	$Lt_117_43010;
$Lt_117_39170:
	setp.eq.ftz.f32 	%p11, %f12, %f6;
	setp.eq.ftz.f32 	%p12, %f11, %f7;
	@!%p12 bra 	$Lt_117_41218;
	@!%p11 bra 	$Lt_117_41730;
	setp.lt.ftz.f32 	%p13, %f6, %f7;
	@!%p13 bra 	$Lt_117_42242;
	.loc	22	191	0
	max.ftz.f32 	%f13, %f1, %f2;
	setp.lt.ftz.f32 	%p8, %f1, %f2;
	max.ftz.f32 	%f14, %f13, %f3;
	selp.f32 	%f15, %f1, %f2, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f3;
	selp.f32 	%f16, %f15, %f3, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f28, %f5, %f6;
	mul.ftz.f32 	%f29, %f18, %f28;
	sub.ftz.f32 	%f30, %f7, %f6;
	div.approx.ftz.f32 	%f22, %f29, %f30;
	.loc	22	192	0
	mov.f32 	%f27, %f18;
	bra.uni 	$Lt_117_41986;
$Lt_117_42242:
	.loc	22	196	0
	mov.f32 	%f22, 0f00000000;    	// 0
	mov.f32 	%f27, 0f00000000;    	// 0
$Lt_117_41986:
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_117_43010;
$Lt_117_41730:
	setp.lt.ftz.f32 	%p14, %f5, %f7;
	@!%p14 bra 	$Lt_117_42754;
	.loc	22	204	0
	max.ftz.f32 	%f13, %f1, %f2;
	setp.lt.ftz.f32 	%p8, %f1, %f2;
	max.ftz.f32 	%f14, %f13, %f3;
	selp.f32 	%f15, %f1, %f2, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f3;
	selp.f32 	%f16, %f15, %f3, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f31, %f6, %f5;
	mul.ftz.f32 	%f32, %f18, %f31;
	sub.ftz.f32 	%f33, %f7, %f5;
	div.approx.ftz.f32 	%f23, %f32, %f33;
	.loc	22	205	0
	mov.f32 	%f27, %f18;
	bra.uni 	$Lt_117_42498;
$Lt_117_42754:
	.loc	22	209	0
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
$Lt_117_42498:
	.loc	22	211	0
	mov.f32 	%f22, 0f00000000;    	// 0
	bra.uni 	$Lt_117_43010;
$Lt_117_41218:
	@!%p11 bra 	$Lt_117_43266;
	setp.gt.ftz.f32 	%p15, %f5, %f6;
	@!%p15 bra 	$Lt_117_43778;
	.loc	22	220	0
	max.ftz.f32 	%f13, %f1, %f2;
	setp.lt.ftz.f32 	%p8, %f1, %f2;
	max.ftz.f32 	%f14, %f13, %f3;
	selp.f32 	%f15, %f1, %f2, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f3;
	selp.f32 	%f16, %f15, %f3, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f34, %f7, %f6;
	mul.ftz.f32 	%f35, %f18, %f34;
	sub.ftz.f32 	%f36, %f5, %f6;
	div.approx.ftz.f32 	%f27, %f35, %f36;
	.loc	22	221	0
	mov.f32 	%f22, %f18;
	bra.uni 	$Lt_117_43522;
$Lt_117_43778:
	.loc	22	225	0
	mov.f32 	%f22, 0f00000000;    	// 0
	mov.f32 	%f27, 0f00000000;    	// 0
$Lt_117_43522:
	mov.f32 	%f23, 0f00000000;    	// 0
	bra.uni 	$Lt_117_43010;
$Lt_117_43266:
	@!%p3 bra 	$Lt_117_44290;
	.loc	22	233	0
	max.ftz.f32 	%f13, %f1, %f2;
	setp.lt.ftz.f32 	%p8, %f1, %f2;
	max.ftz.f32 	%f14, %f13, %f3;
	selp.f32 	%f15, %f1, %f2, %p8;
	setp.lt.ftz.f32 	%p9, %f15, %f3;
	selp.f32 	%f16, %f15, %f3, %p9;
	sub.ftz.f32 	%f17, %f14, %f16;
	cvt.ftz.sat.f32.f32 	%f18, %f17;
	sub.ftz.f32 	%f37, %f7, %f5;
	mul.ftz.f32 	%f38, %f18, %f37;
	sub.ftz.f32 	%f39, %f6, %f5;
	div.approx.ftz.f32 	%f27, %f38, %f39;
	.loc	22	234	0
	mov.f32 	%f23, %f18;
	bra.uni 	$Lt_117_44034;
$Lt_117_44290:
	.loc	22	238	0
	mov.f32 	%f27, 0f00000000;    	// 0
	mov.f32 	%f23, 0f00000000;    	// 0
$Lt_117_44034:
	.loc	22	240	0
	mov.f32 	%f22, 0f00000000;    	// 0
$Lt_117_43010:
$Lt_117_40962:
$Lt_117_38914:
	.loc	22	113	0
	ld.const.f32 	%f40, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f41, %f6, %f40;
	mul.ftz.f32 	%f42, %f40, %f23;
	ld.const.f32 	%f43, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f44, %f43, %f7, %f41;
	fma.rn.ftz.f32 	%f45, %f43, %f27, %f42;
	ld.const.f32 	%f46, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f47, %f46, %f5, %f44;
	fma.rn.ftz.f32 	%f48, %f46, %f22, %f45;
	cvt.ftz.sat.f32.f32 	%f49, %f47;
	cvt.ftz.sat.f32.f32 	%f50, %f48;
	sub.ftz.f32 	%f51, %f49, %f50;
	add.ftz.f32 	%f52, %f51, %f22;
	mov.f32 	%f53, %f52;
	add.ftz.f32 	%f54, %f51, %f23;
	mov.f32 	%f55, %f54;
	add.ftz.f32 	%f56, %f51, %f27;
	mov.f32 	%f57, %f56;
	.loc	22	50	0
	mul.ftz.f32 	%f58, %f54, %f40;
	fma.rn.ftz.f32 	%f59, %f43, %f56, %f58;
	fma.rn.ftz.f32 	%f60, %f46, %f52, %f59;
	cvt.ftz.sat.f32.f32 	%f61, %f60;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p16, %f54, %f52;
	selp.f32 	%f62, %f52, %f54, %p16;
	setp.lt.ftz.f32 	%p17, %f62, %f56;
	selp.f32 	%f63, %f62, %f56, %p17;
	mov.f32 	%f64, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p18, %f63, %f64;
	@!%p18 bra 	$Lt_117_44546;
	.loc	22	119	0
	sub.ftz.f32 	%f65, %f61, %f63;
	sub.ftz.f32 	%f66, %f56, %f61;
	mul.ftz.f32 	%f67, %f61, %f66;
	div.approx.ftz.f32 	%f68, %f67, %f65;
	add.ftz.f32 	%f57, %f61, %f68;
	.loc	22	120	0
	sub.ftz.f32 	%f69, %f54, %f61;
	mul.ftz.f32 	%f70, %f61, %f69;
	div.approx.ftz.f32 	%f71, %f70, %f65;
	add.ftz.f32 	%f55, %f61, %f71;
	.loc	22	121	0
	sub.ftz.f32 	%f72, %f52, %f61;
	mul.ftz.f32 	%f73, %f61, %f72;
	div.approx.ftz.f32 	%f74, %f73, %f65;
	add.ftz.f32 	%f53, %f61, %f74;
$Lt_117_44546:
	max.ftz.f32 	%f75, %f54, %f52;
	max.ftz.f32 	%f76, %f75, %f56;
	mov.f32 	%f77, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p19, %f76, %f77;
	@!%p19 bra 	$Lt_117_45058;
	.loc	27	529	0
	mov.f32 	%f78, 0f3f800000;    	// 1
	sub.ftz.f32 	%f79, %f78, %f61;
	sub.ftz.f32 	%f80, %f76, %f61;
	sub.ftz.f32 	%f81, %f57, %f61;
	mul.ftz.f32 	%f82, %f79, %f81;
	div.approx.ftz.f32 	%f83, %f82, %f80;
	.loc	22	125	0
	add.ftz.f32 	%f57, %f83, %f61;
	.loc	27	529	0
	sub.ftz.f32 	%f84, %f55, %f61;
	mul.ftz.f32 	%f85, %f79, %f84;
	div.approx.ftz.f32 	%f86, %f85, %f80;
	.loc	22	126	0
	add.ftz.f32 	%f55, %f86, %f61;
	.loc	27	529	0
	sub.ftz.f32 	%f87, %f53, %f61;
	mul.ftz.f32 	%f88, %f79, %f87;
	div.approx.ftz.f32 	%f89, %f88, %f80;
	.loc	22	127	0
	add.ftz.f32 	%f53, %f89, %f61;
$Lt_117_45058:
	.loc	22	468	0
	ld.param.f32 	%f90, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inAlphaGain];
	mul.ftz.f32 	%f91, %f90, %f4;
	add.ftz.f32 	%f92, %f91, %f8;
	mul.ftz.f32 	%f93, %f91, %f8;
	sub.ftz.f32 	%f94, %f92, %f93;
	mov.f32 	%f95, %f94;
	mov.f32 	%f96, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f97, %f94, %f96;
	mov.f32 	%f98, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p20, %f97, %f98;
	@!%p20 bra 	$Lt_117_45826;
	mov.f32 	%f99, 0f00000000;    	// 0
	mov.f32 	%f100, 0f00000000;   	// 0
	mov.f32 	%f101, 0f00000000;   	// 0
	mov.f32 	%f95, 0f00000000;    	// 0
	bra.uni 	$Lt_117_45570;
$Lt_117_45826:
	mov.f32 	%f102, 0f3f800000;   	// 1
	sub.ftz.f32 	%f103, %f102, %f8;
	rcp.approx.ftz.f32 	%f104, %f94;
	mul.ftz.f32 	%f105, %f91, %f104;
	mov.f32 	%f106, 0f3f800000;   	// 1
	mul.ftz.f32 	%f107, %f91, %f104;
	sub.ftz.f32 	%f108, %f106, %f107;
	mul.ftz.f32 	%f109, %f103, %f53;
	fma.rn.ftz.f32 	%f110, %f53, %f8, %f109;
	mul.ftz.f32 	%f111, %f105, %f110;
	fma.rn.ftz.f32 	%f101, %f5, %f108, %f111;
	mul.ftz.f32 	%f112, %f103, %f55;
	fma.rn.ftz.f32 	%f113, %f55, %f8, %f112;
	mul.ftz.f32 	%f114, %f105, %f113;
	fma.rn.ftz.f32 	%f100, %f6, %f108, %f114;
	mul.ftz.f32 	%f115, %f103, %f57;
	fma.rn.ftz.f32 	%f116, %f57, %f8, %f115;
	mul.ftz.f32 	%f117, %f105, %f116;
	fma.rn.ftz.f32 	%f99, %f7, %f108, %f117;
$Lt_117_45570:
	.loc	22	542	0
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Saturation_Kernel_inDest];
	@!%p2 bra 	$Lt_117_46338;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f101;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f100;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f99;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f95;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	542	0
	bra.uni 	$Lt_117_46082;
$Lt_117_46338:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f101,%f100,%f99,%f95};
$Lt_117_46082:
$Lt_117_37378:
	.loc	22	542	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Saturation_Kernel:
	} // BlendMode_IR_BlendMode_Saturation_Kernel

	.entry BlendMode_IR_BlendMode_Color_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<88>;
	.reg .pred %p<9>;
	.loc	22	543	0
$LDWbegin_BlendMode_IR_BlendMode_Color_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_118_23810;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inSrc0];
	@!%p2 bra 	$Lt_118_24578;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	543	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_118_24322;
$Lt_118_24578:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_118_24322:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inSrc1];
	@!%p2 bra 	$Lt_118_25090;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	543	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_118_24834;
$Lt_118_25090:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_118_24834:
	.loc	22	113	0
	ld.const.f32 	%f9, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f10, %f6, %f9;
	mul.ftz.f32 	%f11, %f2, %f9;
	ld.const.f32 	%f12, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f13, %f12, %f7, %f10;
	fma.rn.ftz.f32 	%f14, %f12, %f3, %f11;
	ld.const.f32 	%f15, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f16, %f15, %f5, %f13;
	fma.rn.ftz.f32 	%f17, %f15, %f1, %f14;
	cvt.ftz.sat.f32.f32 	%f18, %f16;
	cvt.ftz.sat.f32.f32 	%f19, %f17;
	sub.ftz.f32 	%f20, %f18, %f19;
	add.ftz.f32 	%f21, %f20, %f1;
	mov.f32 	%f22, %f21;
	add.ftz.f32 	%f23, %f20, %f2;
	mov.f32 	%f24, %f23;
	add.ftz.f32 	%f25, %f20, %f3;
	mov.f32 	%f26, %f25;
	.loc	22	50	0
	mul.ftz.f32 	%f27, %f23, %f9;
	fma.rn.ftz.f32 	%f28, %f12, %f25, %f27;
	fma.rn.ftz.f32 	%f29, %f15, %f21, %f28;
	cvt.ftz.sat.f32.f32 	%f30, %f29;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p3, %f23, %f21;
	selp.f32 	%f31, %f21, %f23, %p3;
	setp.lt.ftz.f32 	%p4, %f31, %f25;
	selp.f32 	%f32, %f31, %f25, %p4;
	mov.f32 	%f33, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f32, %f33;
	@!%p5 bra 	$Lt_118_25346;
	.loc	22	119	0
	sub.ftz.f32 	%f34, %f30, %f32;
	sub.ftz.f32 	%f35, %f25, %f30;
	mul.ftz.f32 	%f36, %f30, %f35;
	div.approx.ftz.f32 	%f37, %f36, %f34;
	add.ftz.f32 	%f26, %f30, %f37;
	.loc	22	120	0
	sub.ftz.f32 	%f38, %f23, %f30;
	mul.ftz.f32 	%f39, %f30, %f38;
	div.approx.ftz.f32 	%f40, %f39, %f34;
	add.ftz.f32 	%f24, %f30, %f40;
	.loc	22	121	0
	sub.ftz.f32 	%f41, %f21, %f30;
	mul.ftz.f32 	%f42, %f30, %f41;
	div.approx.ftz.f32 	%f43, %f42, %f34;
	add.ftz.f32 	%f22, %f30, %f43;
$Lt_118_25346:
	max.ftz.f32 	%f44, %f23, %f21;
	max.ftz.f32 	%f45, %f44, %f25;
	mov.f32 	%f46, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p6, %f45, %f46;
	@!%p6 bra 	$Lt_118_25858;
	.loc	27	529	0
	mov.f32 	%f47, 0f3f800000;    	// 1
	sub.ftz.f32 	%f48, %f47, %f30;
	sub.ftz.f32 	%f49, %f45, %f30;
	sub.ftz.f32 	%f50, %f26, %f30;
	mul.ftz.f32 	%f51, %f48, %f50;
	div.approx.ftz.f32 	%f52, %f51, %f49;
	.loc	22	125	0
	add.ftz.f32 	%f26, %f52, %f30;
	.loc	27	529	0
	sub.ftz.f32 	%f53, %f24, %f30;
	mul.ftz.f32 	%f54, %f48, %f53;
	div.approx.ftz.f32 	%f55, %f54, %f49;
	.loc	22	126	0
	add.ftz.f32 	%f24, %f55, %f30;
	.loc	27	529	0
	sub.ftz.f32 	%f56, %f22, %f30;
	mul.ftz.f32 	%f57, %f48, %f56;
	div.approx.ftz.f32 	%f58, %f57, %f49;
	.loc	22	127	0
	add.ftz.f32 	%f22, %f58, %f30;
$Lt_118_25858:
	.loc	22	468	0
	ld.param.f32 	%f59, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inAlphaGain];
	mul.ftz.f32 	%f60, %f59, %f4;
	add.ftz.f32 	%f61, %f60, %f8;
	mul.ftz.f32 	%f62, %f60, %f8;
	sub.ftz.f32 	%f63, %f61, %f62;
	mov.f32 	%f64, %f63;
	mov.f32 	%f65, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f66, %f63, %f65;
	mov.f32 	%f67, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p7, %f66, %f67;
	@!%p7 bra 	$Lt_118_26626;
	mov.f32 	%f68, 0f00000000;    	// 0
	mov.f32 	%f69, 0f00000000;    	// 0
	mov.f32 	%f70, 0f00000000;    	// 0
	mov.f32 	%f64, 0f00000000;    	// 0
	bra.uni 	$Lt_118_26370;
$Lt_118_26626:
	mov.f32 	%f71, 0f3f800000;    	// 1
	sub.ftz.f32 	%f72, %f71, %f8;
	rcp.approx.ftz.f32 	%f73, %f63;
	mul.ftz.f32 	%f74, %f60, %f73;
	mov.f32 	%f75, 0f3f800000;    	// 1
	mul.ftz.f32 	%f76, %f60, %f73;
	sub.ftz.f32 	%f77, %f75, %f76;
	mul.ftz.f32 	%f78, %f72, %f22;
	fma.rn.ftz.f32 	%f79, %f22, %f8, %f78;
	mul.ftz.f32 	%f80, %f74, %f79;
	fma.rn.ftz.f32 	%f70, %f5, %f77, %f80;
	mul.ftz.f32 	%f81, %f72, %f24;
	fma.rn.ftz.f32 	%f82, %f24, %f8, %f81;
	mul.ftz.f32 	%f83, %f74, %f82;
	fma.rn.ftz.f32 	%f69, %f6, %f77, %f83;
	mul.ftz.f32 	%f84, %f72, %f26;
	fma.rn.ftz.f32 	%f85, %f26, %f8, %f84;
	mul.ftz.f32 	%f86, %f74, %f85;
	fma.rn.ftz.f32 	%f68, %f7, %f77, %f86;
$Lt_118_26370:
	.loc	22	543	0
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Color_Kernel_inDest];
	@!%p2 bra 	$Lt_118_27138;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f70;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f69;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f68;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f64;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	543	0
	bra.uni 	$Lt_118_26882;
$Lt_118_27138:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f70,%f69,%f68,%f64};
$Lt_118_26882:
$Lt_118_23810:
	.loc	22	543	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Color_Kernel:
	} // BlendMode_IR_BlendMode_Color_Kernel

	.entry BlendMode_IR_BlendMode_Luminosity_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inAlphaGain)
	{
	.reg .u32 %r<43>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<88>;
	.reg .pred %p<9>;
	.loc	22	544	0
$LDWbegin_BlendMode_IR_BlendMode_Luminosity_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_119_23810;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inSrc0];
	@!%p2 bra 	$Lt_119_24578;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	544	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_119_24322;
$Lt_119_24578:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_119_24322:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inSrc1];
	@!%p2 bra 	$Lt_119_25090;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	544	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_119_24834;
$Lt_119_25090:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_119_24834:
	.loc	22	113	0
	ld.const.f32 	%f9, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f10, %f2, %f9;
	mul.ftz.f32 	%f11, %f6, %f9;
	ld.const.f32 	%f12, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f13, %f12, %f3, %f10;
	fma.rn.ftz.f32 	%f14, %f12, %f7, %f11;
	ld.const.f32 	%f15, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f16, %f15, %f1, %f13;
	fma.rn.ftz.f32 	%f17, %f15, %f5, %f14;
	cvt.ftz.sat.f32.f32 	%f18, %f16;
	cvt.ftz.sat.f32.f32 	%f19, %f17;
	sub.ftz.f32 	%f20, %f18, %f19;
	add.ftz.f32 	%f21, %f20, %f5;
	mov.f32 	%f22, %f21;
	add.ftz.f32 	%f23, %f20, %f6;
	mov.f32 	%f24, %f23;
	add.ftz.f32 	%f25, %f20, %f7;
	mov.f32 	%f26, %f25;
	.loc	22	50	0
	mul.ftz.f32 	%f27, %f23, %f9;
	fma.rn.ftz.f32 	%f28, %f12, %f25, %f27;
	fma.rn.ftz.f32 	%f29, %f15, %f21, %f28;
	cvt.ftz.sat.f32.f32 	%f30, %f29;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p3, %f23, %f21;
	selp.f32 	%f31, %f21, %f23, %p3;
	setp.lt.ftz.f32 	%p4, %f31, %f25;
	selp.f32 	%f32, %f31, %f25, %p4;
	mov.f32 	%f33, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f32, %f33;
	@!%p5 bra 	$Lt_119_25346;
	.loc	22	119	0
	sub.ftz.f32 	%f34, %f30, %f32;
	sub.ftz.f32 	%f35, %f25, %f30;
	mul.ftz.f32 	%f36, %f30, %f35;
	div.approx.ftz.f32 	%f37, %f36, %f34;
	add.ftz.f32 	%f26, %f30, %f37;
	.loc	22	120	0
	sub.ftz.f32 	%f38, %f23, %f30;
	mul.ftz.f32 	%f39, %f30, %f38;
	div.approx.ftz.f32 	%f40, %f39, %f34;
	add.ftz.f32 	%f24, %f30, %f40;
	.loc	22	121	0
	sub.ftz.f32 	%f41, %f21, %f30;
	mul.ftz.f32 	%f42, %f30, %f41;
	div.approx.ftz.f32 	%f43, %f42, %f34;
	add.ftz.f32 	%f22, %f30, %f43;
$Lt_119_25346:
	max.ftz.f32 	%f44, %f23, %f21;
	max.ftz.f32 	%f45, %f44, %f25;
	mov.f32 	%f46, 0f3f800000;    	// 1
	setp.gt.ftz.f32 	%p6, %f45, %f46;
	@!%p6 bra 	$Lt_119_25858;
	.loc	27	529	0
	mov.f32 	%f47, 0f3f800000;    	// 1
	sub.ftz.f32 	%f48, %f47, %f30;
	sub.ftz.f32 	%f49, %f45, %f30;
	sub.ftz.f32 	%f50, %f26, %f30;
	mul.ftz.f32 	%f51, %f48, %f50;
	div.approx.ftz.f32 	%f52, %f51, %f49;
	.loc	22	125	0
	add.ftz.f32 	%f26, %f52, %f30;
	.loc	27	529	0
	sub.ftz.f32 	%f53, %f24, %f30;
	mul.ftz.f32 	%f54, %f48, %f53;
	div.approx.ftz.f32 	%f55, %f54, %f49;
	.loc	22	126	0
	add.ftz.f32 	%f24, %f55, %f30;
	.loc	27	529	0
	sub.ftz.f32 	%f56, %f22, %f30;
	mul.ftz.f32 	%f57, %f48, %f56;
	div.approx.ftz.f32 	%f58, %f57, %f49;
	.loc	22	127	0
	add.ftz.f32 	%f22, %f58, %f30;
$Lt_119_25858:
	.loc	22	468	0
	ld.param.f32 	%f59, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inAlphaGain];
	mul.ftz.f32 	%f60, %f59, %f4;
	add.ftz.f32 	%f61, %f60, %f8;
	mul.ftz.f32 	%f62, %f60, %f8;
	sub.ftz.f32 	%f63, %f61, %f62;
	mov.f32 	%f64, %f63;
	mov.f32 	%f65, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f66, %f63, %f65;
	mov.f32 	%f67, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p7, %f66, %f67;
	@!%p7 bra 	$Lt_119_26626;
	mov.f32 	%f68, 0f00000000;    	// 0
	mov.f32 	%f69, 0f00000000;    	// 0
	mov.f32 	%f70, 0f00000000;    	// 0
	mov.f32 	%f64, 0f00000000;    	// 0
	bra.uni 	$Lt_119_26370;
$Lt_119_26626:
	mov.f32 	%f71, 0f3f800000;    	// 1
	sub.ftz.f32 	%f72, %f71, %f8;
	rcp.approx.ftz.f32 	%f73, %f63;
	mul.ftz.f32 	%f74, %f60, %f73;
	mov.f32 	%f75, 0f3f800000;    	// 1
	mul.ftz.f32 	%f76, %f60, %f73;
	sub.ftz.f32 	%f77, %f75, %f76;
	mul.ftz.f32 	%f78, %f72, %f22;
	fma.rn.ftz.f32 	%f79, %f22, %f8, %f78;
	mul.ftz.f32 	%f80, %f74, %f79;
	fma.rn.ftz.f32 	%f70, %f5, %f77, %f80;
	mul.ftz.f32 	%f81, %f72, %f24;
	fma.rn.ftz.f32 	%f82, %f24, %f8, %f81;
	mul.ftz.f32 	%f83, %f74, %f82;
	fma.rn.ftz.f32 	%f69, %f6, %f77, %f83;
	mul.ftz.f32 	%f84, %f72, %f26;
	fma.rn.ftz.f32 	%f85, %f26, %f8, %f84;
	mul.ftz.f32 	%f86, %f74, %f85;
	fma.rn.ftz.f32 	%f68, %f7, %f77, %f86;
$Lt_119_26370:
	.loc	22	544	0
	ld.param.s32 	%r35, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inDestPitch];
	mul.lo.s32 	%r36, %r35, %r10;
	add.s32 	%r37, %r8, %r36;
	cvt.s64.s32 	%rd13, %r37;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Luminosity_Kernel_inDest];
	@!%p2 bra 	$Lt_119_27138;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f70;
	mov.b32		%r38, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f69;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f68;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f64;
	mov.b32		%r41, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r38,%r39,%r40,%r41};
	.loc	22	544	0
	bra.uni 	$Lt_119_26882;
$Lt_119_27138:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f70,%f69,%f68,%f64};
$Lt_119_26882:
$Lt_119_23810:
	.loc	22	544	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Luminosity_Kernel:
	} // BlendMode_IR_BlendMode_Luminosity_Kernel

	.entry BlendMode_IR_BlendMode_Dissolve_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inAlphaGain)
	{
	.reg .u32 %r<91>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<28>;
	.reg .pred %p<7>;
	.loc	22	545	0
$LDWbegin_BlendMode_IR_BlendMode_Dissolve_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inWidth];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inHeight];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_120_22274;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inSrc0];
	@!%p2 bra 	$Lt_120_23042;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	545	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_120_22786;
$Lt_120_23042:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_120_22786:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inSrc1];
	@!%p2 bra 	$Lt_120_23554;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	545	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_120_23298;
$Lt_120_23554:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_120_23298:
	.loc	22	526	0
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f10, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f12, %f13;
	@!%p3 bra 	$Lt_120_20738;
	.loc	22	528	0
	mov.f32 	%f14, %f5;
	mov.f32 	%f15, %f6;
	mov.f32 	%f16, %f7;
	mov.f32 	%f17, %f8;
	bra.uni 	$LDWendi__Z4Randj_297_1;
$Lt_120_20738:
	.loc	22	530	0
	mov.f32 	%f18, 0f370637bd;    	// 8e-006
	add.ftz.f32 	%f19, %f10, %f18;
	mov.f32 	%f20, 0f3f800000;    	// 1
	setp.ge.ftz.f32 	%p4, %f19, %f20;
	@!%p4 bra 	$Lt_120_20994;
	.loc	22	532	0
	mov.f32 	%f14, %f1;
	mov.f32 	%f15, %f2;
	mov.f32 	%f16, %f3;
	mov.f32 	%f17, %f4;
	bra.uni 	$LDWendi__Z4Randj_297_1;
$Lt_120_20994:
	.loc	21	143	0
	mov.s32 	%r35, 1;
	sub.s32 	%r36, %r35, %r8;
	shr.u32 	%r37, %r10, 13;
	sub.u32 	%r38, %r8, %r10;
	sub.u32 	%r39, %r36, %r10;
	xor.b32 	%r40, %r37, %r39;
	shl.b32 	%r41, %r40, 8;
	sub.u32 	%r42, %r38, %r40;
	sub.u32 	%r43, %r10, %r40;
	xor.b32 	%r44, %r41, %r42;
	shr.u32 	%r45, %r44, 13;
	sub.u32 	%r46, %r43, %r44;
	sub.u32 	%r47, %r40, %r44;
	xor.b32 	%r48, %r45, %r46;
	shr.u32 	%r49, %r48, 12;
	sub.u32 	%r50, %r47, %r48;
	xor.b32 	%r51, %r49, %r50;
	sub.u32 	%r52, %r44, %r48;
	sub.u32 	%r53, %r52, %r51;
	shl.b32 	%r54, %r51, 16;
	xor.b32 	%r55, %r53, %r54;
	.loc	21	144	0
	sub.u32 	%r56, %r48, %r51;
	sub.u32 	%r57, %r56, %r55;
	shr.u32 	%r58, %r55, 5;
	xor.b32 	%r59, %r57, %r58;
	.loc	21	145	0
	sub.u32 	%r60, %r51, %r55;
	sub.u32 	%r61, %r60, %r59;
	shr.u32 	%r62, %r59, 3;
	xor.b32 	%r63, %r61, %r62;
	.loc	21	146	0
	sub.u32 	%r64, %r55, %r59;
	sub.u32 	%r65, %r64, %r63;
	shl.b32 	%r66, %r63, 10;
	xor.b32 	%r67, %r65, %r66;
	.loc	21	147	0
	sub.u32 	%r68, %r59, %r63;
	sub.u32 	%r69, %r68, %r67;
	shr.u32 	%r70, %r67, 15;
	xor.b32 	%r71, %r69, %r70;
	.loc	22	537	0
	mov.f32 	%f21, 0f46fffe00;    	// 32767
	mul.ftz.f32 	%f22, %f10, %f21;
	cvt.rzi.ftz.s32.f32 	%r72, %f22;
	mul.lo.u32 	%r73, %r71, 1103515245;
	add.u32 	%r74, %r73, 12345;
	shr.u32 	%r75, %r74, 16;
	and.b32 	%r76, %r75, 255;
	shl.b32 	%r77, %r76, 7;
	mul.lo.u32 	%r78, %r71, -1029531031;
	sub.u32 	%r79, %r78, 740551042;
	shr.u32 	%r80, %r79, 16;
	and.b32 	%r81, %r80, 255;
	xor.b32 	%r82, %r77, %r81;
	setp.lt.s32 	%p5, %r72, %r82;
	@%p5 bra 	$Lt_120_24066;
	mov.f32 	%f23, %f1;
	mov.f32 	%f24, %f2;
	mov.f32 	%f25, %f3;
	mov.f32 	%f26, %f4;
	bra.uni 	$Lt_120_23810;
$Lt_120_24066:
	mov.f32 	%f23, %f5;
	mov.f32 	%f24, %f6;
	mov.f32 	%f25, %f7;
	mov.f32 	%f26, %f8;
$Lt_120_23810:
	mov.f32 	%f14, %f23;
	mov.f32 	%f15, %f24;
	mov.f32 	%f16, %f25;
	mov.f32 	%f17, %f26;
$LDWendi__Z4Randj_297_1:
	.loc	22	545	0
	ld.param.s32 	%r83, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inDestPitch];
	mul.lo.s32 	%r84, %r83, %r10;
	add.s32 	%r85, %r8, %r84;
	cvt.s64.s32 	%rd13, %r85;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_Dissolve_Kernel_inDest];
	@!%p2 bra 	$Lt_120_24578;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r86, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f15;
	mov.b32		%r87, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f16;
	mov.b32		%r88, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f17;
	mov.b32		%r89, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r86,%r87,%r88,%r89};
	.loc	22	545	0
	bra.uni 	$Lt_120_24322;
$Lt_120_24578:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f14,%f15,%f16,%f17};
$Lt_120_24322:
$Lt_120_22274:
	.loc	22	545	0
	exit;
$LDWend_BlendMode_IR_BlendMode_Dissolve_Kernel:
	} // BlendMode_IR_BlendMode_Dissolve_Kernel

	.entry BlendMode_IR_BlendMode_DarkerColor_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inAlphaGain)
	{
	.reg .u32 %r<44>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<104>;
	.reg .pred %p<11>;
	.loc	22	608	0
$LDWbegin_BlendMode_IR_BlendMode_DarkerColor_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inWidth];
	ld.param.s32 	%r12, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inHeight];
	set.gt.u32.s32 	%r13, %r12, %r10;
	neg.s32 	%r14, %r13;
	set.gt.u32.s32 	%r15, %r11, %r8;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r14, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_121_25858;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inSrc0];
	@!%p2 bra 	$Lt_121_26626;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	608	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_121_26370;
$Lt_121_26626:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_121_26370:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inSrc1];
	@!%p2 bra 	$Lt_121_27138;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	608	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_121_26882;
$Lt_121_27138:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_121_26882:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f10, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f12, %f13;
	@!%p3 bra 	$Lt_121_27650;
	mov.f32 	%f14, %f5;
	mov.f32 	%f15, %f6;
	mov.f32 	%f16, %f7;
	mov.f32 	%f17, %f8;
	bra.uni 	$Lt_121_27394;
$Lt_121_27650:
	mov.f32 	%f18, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f19, %f8, %f18;
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p4, %f19, %f20;
	@!%p4 bra 	$Lt_121_28162;
	mov.f32 	%f14, %f1;
	mov.f32 	%f15, %f2;
	mov.f32 	%f16, %f3;
	mov.f32 	%f17, %f10;
	bra.uni 	$Lt_121_27906;
$Lt_121_28162:
	mov.u32 	%r35, 720;
	setp.gt.s32 	%p5, %r11, %r35;
	@%p5 bra 	$Lt_121_28674;
	.loc	22	555	0
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f22, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f23, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f24, %f2, %f23;
	fma.rn.ftz.f32 	%f25, %f22, %f3, %f24;
	fma.rn.ftz.f32 	%f26, %f21, %f1, %f25;
	cvt.ftz.sat.f32.f32 	%f27, %f26;
	mul.ftz.f32 	%f28, %f6, %f23;
	fma.rn.ftz.f32 	%f29, %f22, %f7, %f28;
	fma.rn.ftz.f32 	%f30, %f21, %f5, %f29;
	cvt.ftz.sat.f32.f32 	%f31, %f30;
	setp.lt.ftz.f32 	%p6, %f27, %f31;
	@!%p6 bra 	$Lt_121_24066;
	.loc	22	468	0
	add.ftz.f32 	%f32, %f10, %f8;
	mul.ftz.f32 	%f33, %f10, %f8;
	sub.ftz.f32 	%f34, %f32, %f33;
	mov.f32 	%f35, %f34;
	mov.f32 	%f36, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f37, %f34, %f36;
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p7, %f37, %f38;
	@!%p7 bra 	$Lt_121_29186;
	mov.f32 	%f39, 0f00000000;    	// 0
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f41, 0f00000000;    	// 0
	mov.f32 	%f35, 0f00000000;    	// 0
	bra.uni 	$Lt_121_28930;
$Lt_121_29186:
	mov.f32 	%f42, 0f3f800000;    	// 1
	sub.ftz.f32 	%f43, %f42, %f8;
	rcp.approx.ftz.f32 	%f44, %f34;
	mul.ftz.f32 	%f45, %f10, %f44;
	mov.f32 	%f46, 0f3f800000;    	// 1
	mul.ftz.f32 	%f47, %f10, %f44;
	sub.ftz.f32 	%f48, %f46, %f47;
	mul.ftz.f32 	%f49, %f43, %f1;
	fma.rn.ftz.f32 	%f50, %f1, %f8, %f49;
	mul.ftz.f32 	%f51, %f45, %f50;
	fma.rn.ftz.f32 	%f41, %f5, %f48, %f51;
	mul.ftz.f32 	%f52, %f43, %f2;
	fma.rn.ftz.f32 	%f53, %f2, %f8, %f52;
	mul.ftz.f32 	%f54, %f45, %f53;
	fma.rn.ftz.f32 	%f40, %f6, %f48, %f54;
	mul.ftz.f32 	%f55, %f43, %f3;
	fma.rn.ftz.f32 	%f56, %f3, %f8, %f55;
	mul.ftz.f32 	%f57, %f45, %f56;
	fma.rn.ftz.f32 	%f39, %f7, %f48, %f57;
$Lt_121_28930:
	.loc	22	557	0
	mov.f32 	%f58, %f41;
	mov.f32 	%f59, %f40;
	mov.f32 	%f60, %f39;
	mov.f32 	%f61, %f35;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_298_3;
$Lt_121_24066:
	.loc	22	561	0
	mov.f32 	%f58, %f5;
	mov.f32 	%f59, %f6;
	mov.f32 	%f60, %f7;
	mov.f32 	%f61, %f8;
$LDWendi__Z10GetLuma6018PixelRGB_298_3:
	.loc	22	608	0
	mov.f32 	%f14, %f58;
	mov.f32 	%f15, %f59;
	mov.f32 	%f16, %f60;
	mov.f32 	%f17, %f61;
	bra.uni 	$Lt_121_28418;
$Lt_121_28674:
	.loc	22	569	0
	ld.const.f32 	%f62, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f63, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f64, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f65, %f2, %f64;
	fma.rn.ftz.f32 	%f66, %f63, %f3, %f65;
	fma.rn.ftz.f32 	%f67, %f62, %f1, %f66;
	cvt.ftz.sat.f32.f32 	%f68, %f67;
	mul.ftz.f32 	%f69, %f6, %f64;
	fma.rn.ftz.f32 	%f70, %f63, %f7, %f69;
	fma.rn.ftz.f32 	%f71, %f62, %f5, %f70;
	cvt.ftz.sat.f32.f32 	%f72, %f71;
	setp.lt.ftz.f32 	%p8, %f68, %f72;
	@!%p8 bra 	$Lt_121_24578;
	.loc	22	468	0
	add.ftz.f32 	%f73, %f10, %f8;
	mul.ftz.f32 	%f74, %f10, %f8;
	sub.ftz.f32 	%f75, %f73, %f74;
	mov.f32 	%f76, %f75;
	mov.f32 	%f77, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f78, %f75, %f77;
	mov.f32 	%f79, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p9, %f78, %f79;
	@!%p9 bra 	$Lt_121_29698;
	mov.f32 	%f80, 0f00000000;    	// 0
	mov.f32 	%f81, 0f00000000;    	// 0
	mov.f32 	%f82, 0f00000000;    	// 0
	mov.f32 	%f76, 0f00000000;    	// 0
	bra.uni 	$Lt_121_29442;
$Lt_121_29698:
	mov.f32 	%f83, 0f3f800000;    	// 1
	sub.ftz.f32 	%f84, %f83, %f8;
	rcp.approx.ftz.f32 	%f85, %f75;
	mul.ftz.f32 	%f86, %f10, %f85;
	mov.f32 	%f87, 0f3f800000;    	// 1
	mul.ftz.f32 	%f88, %f10, %f85;
	sub.ftz.f32 	%f89, %f87, %f88;
	mul.ftz.f32 	%f90, %f84, %f1;
	fma.rn.ftz.f32 	%f91, %f1, %f8, %f90;
	mul.ftz.f32 	%f92, %f86, %f91;
	fma.rn.ftz.f32 	%f82, %f5, %f89, %f92;
	mul.ftz.f32 	%f93, %f84, %f2;
	fma.rn.ftz.f32 	%f94, %f2, %f8, %f93;
	mul.ftz.f32 	%f95, %f86, %f94;
	fma.rn.ftz.f32 	%f81, %f6, %f89, %f95;
	mul.ftz.f32 	%f96, %f84, %f3;
	fma.rn.ftz.f32 	%f97, %f3, %f8, %f96;
	mul.ftz.f32 	%f98, %f86, %f97;
	fma.rn.ftz.f32 	%f80, %f7, %f89, %f98;
$Lt_121_29442:
	.loc	22	571	0
	mov.f32 	%f99, %f82;
	mov.f32 	%f100, %f81;
	mov.f32 	%f101, %f80;
	mov.f32 	%f102, %f76;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_298_1;
$Lt_121_24578:
	.loc	22	575	0
	mov.f32 	%f99, %f5;
	mov.f32 	%f100, %f6;
	mov.f32 	%f101, %f7;
	mov.f32 	%f102, %f8;
$LDWendi__Z10GetLuma7098PixelRGB_298_1:
	.loc	22	608	0
	mov.f32 	%f14, %f99;
	mov.f32 	%f15, %f100;
	mov.f32 	%f16, %f101;
	mov.f32 	%f17, %f102;
$Lt_121_28418:
$Lt_121_27906:
$Lt_121_27394:
	ld.param.s32 	%r36, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inDestPitch];
	mul.lo.s32 	%r37, %r36, %r10;
	add.s32 	%r38, %r8, %r37;
	cvt.s64.s32 	%rd13, %r38;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_DarkerColor_Kernel_inDest];
	@!%p2 bra 	$Lt_121_30210;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f15;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f16;
	mov.b32		%r41, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f17;
	mov.b32		%r42, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r39,%r40,%r41,%r42};
	.loc	22	608	0
	bra.uni 	$Lt_121_29954;
$Lt_121_30210:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f14,%f15,%f16,%f17};
$Lt_121_29954:
$Lt_121_25858:
	.loc	22	608	0
	exit;
$LDWend_BlendMode_IR_BlendMode_DarkerColor_Kernel:
	} // BlendMode_IR_BlendMode_DarkerColor_Kernel

	.entry BlendMode_IR_BlendMode_LighterColor_Kernel (
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inSrc0,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inSrcPitch0,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inSrc1,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inSrcPitch1,
		.param .u64 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inDest,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inDestPitch,
		.param .u32 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inDeviceFormat,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inWidth,
		.param .s32 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inHeight,
		.param .f32 __cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inAlphaGain)
	{
	.reg .u32 %r<44>;
	.reg .u64 %rd<20>;
	.reg .f32 %f<104>;
	.reg .pred %p<11>;
	.loc	22	609	0
$LDWbegin_BlendMode_IR_BlendMode_LighterColor_Kernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inWidth];
	ld.param.s32 	%r12, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inHeight];
	set.gt.u32.s32 	%r13, %r12, %r10;
	neg.s32 	%r14, %r13;
	set.gt.u32.s32 	%r15, %r11, %r8;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r14, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_122_25858;
	ld.param.s32 	%r19, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inSrcPitch0];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inSrc0];
	@!%p2 bra 	$Lt_122_26626;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	22	609	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_122_26370;
$Lt_122_26626:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_122_26370:
	ld.param.s32 	%r28, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inSrcPitch1];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inSrc1];
	@!%p2 bra 	$Lt_122_27138;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r31,%r32,%r33,%r34}, [%rd10+0];
	.loc	22	609	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f5, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f6, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f7, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r34;
	cvt.ftz.f32.f16	%f8, %b1; }
	bra.uni 	$Lt_122_26882;
$Lt_122_27138:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f5,%f6,%f7,%f8}, [%rd12+0];
$Lt_122_26882:
	ld.param.f32 	%f9, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inAlphaGain];
	mul.ftz.f32 	%f10, %f9, %f4;
	mov.f32 	%f11, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f12, %f10, %f11;
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p3, %f12, %f13;
	@!%p3 bra 	$Lt_122_27650;
	mov.f32 	%f14, %f5;
	mov.f32 	%f15, %f6;
	mov.f32 	%f16, %f7;
	mov.f32 	%f17, %f8;
	bra.uni 	$Lt_122_27394;
$Lt_122_27650:
	mov.f32 	%f18, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f19, %f8, %f18;
	mov.f32 	%f20, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p4, %f19, %f20;
	@!%p4 bra 	$Lt_122_28162;
	mov.f32 	%f14, %f1;
	mov.f32 	%f15, %f2;
	mov.f32 	%f16, %f3;
	mov.f32 	%f17, %f10;
	bra.uni 	$Lt_122_27906;
$Lt_122_28162:
	mov.u32 	%r35, 720;
	setp.gt.s32 	%p5, %r11, %r35;
	@%p5 bra 	$Lt_122_28674;
	.loc	22	584	0
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f22, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f23, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f24, %f2, %f23;
	fma.rn.ftz.f32 	%f25, %f22, %f3, %f24;
	fma.rn.ftz.f32 	%f26, %f21, %f1, %f25;
	cvt.ftz.sat.f32.f32 	%f27, %f26;
	mul.ftz.f32 	%f28, %f6, %f23;
	fma.rn.ftz.f32 	%f29, %f22, %f7, %f28;
	fma.rn.ftz.f32 	%f30, %f21, %f5, %f29;
	cvt.ftz.sat.f32.f32 	%f31, %f30;
	setp.gt.ftz.f32 	%p6, %f27, %f31;
	@!%p6 bra 	$Lt_122_24066;
	.loc	22	468	0
	add.ftz.f32 	%f32, %f10, %f8;
	mul.ftz.f32 	%f33, %f10, %f8;
	sub.ftz.f32 	%f34, %f32, %f33;
	mov.f32 	%f35, %f34;
	mov.f32 	%f36, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f37, %f34, %f36;
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p7, %f37, %f38;
	@!%p7 bra 	$Lt_122_29186;
	mov.f32 	%f39, 0f00000000;    	// 0
	mov.f32 	%f40, 0f00000000;    	// 0
	mov.f32 	%f41, 0f00000000;    	// 0
	mov.f32 	%f35, 0f00000000;    	// 0
	bra.uni 	$Lt_122_28930;
$Lt_122_29186:
	mov.f32 	%f42, 0f3f800000;    	// 1
	sub.ftz.f32 	%f43, %f42, %f8;
	rcp.approx.ftz.f32 	%f44, %f34;
	mul.ftz.f32 	%f45, %f10, %f44;
	mov.f32 	%f46, 0f3f800000;    	// 1
	mul.ftz.f32 	%f47, %f10, %f44;
	sub.ftz.f32 	%f48, %f46, %f47;
	mul.ftz.f32 	%f49, %f43, %f1;
	fma.rn.ftz.f32 	%f50, %f1, %f8, %f49;
	mul.ftz.f32 	%f51, %f45, %f50;
	fma.rn.ftz.f32 	%f41, %f5, %f48, %f51;
	mul.ftz.f32 	%f52, %f43, %f2;
	fma.rn.ftz.f32 	%f53, %f2, %f8, %f52;
	mul.ftz.f32 	%f54, %f45, %f53;
	fma.rn.ftz.f32 	%f40, %f6, %f48, %f54;
	mul.ftz.f32 	%f55, %f43, %f3;
	fma.rn.ftz.f32 	%f56, %f3, %f8, %f55;
	mul.ftz.f32 	%f57, %f45, %f56;
	fma.rn.ftz.f32 	%f39, %f7, %f48, %f57;
$Lt_122_28930:
	.loc	22	586	0
	mov.f32 	%f58, %f41;
	mov.f32 	%f59, %f40;
	mov.f32 	%f60, %f39;
	mov.f32 	%f61, %f35;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_299_3;
$Lt_122_24066:
	.loc	22	590	0
	mov.f32 	%f58, %f5;
	mov.f32 	%f59, %f6;
	mov.f32 	%f60, %f7;
	mov.f32 	%f61, %f8;
$LDWendi__Z10GetLuma6018PixelRGB_299_3:
	.loc	22	609	0
	mov.f32 	%f14, %f58;
	mov.f32 	%f15, %f59;
	mov.f32 	%f16, %f60;
	mov.f32 	%f17, %f61;
	bra.uni 	$Lt_122_28418;
$Lt_122_28674:
	.loc	22	598	0
	ld.const.f32 	%f62, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f63, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f64, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f65, %f2, %f64;
	fma.rn.ftz.f32 	%f66, %f63, %f3, %f65;
	fma.rn.ftz.f32 	%f67, %f62, %f1, %f66;
	cvt.ftz.sat.f32.f32 	%f68, %f67;
	mul.ftz.f32 	%f69, %f6, %f64;
	fma.rn.ftz.f32 	%f70, %f63, %f7, %f69;
	fma.rn.ftz.f32 	%f71, %f62, %f5, %f70;
	cvt.ftz.sat.f32.f32 	%f72, %f71;
	setp.gt.ftz.f32 	%p8, %f68, %f72;
	@!%p8 bra 	$Lt_122_24578;
	.loc	22	468	0
	add.ftz.f32 	%f73, %f10, %f8;
	mul.ftz.f32 	%f74, %f10, %f8;
	sub.ftz.f32 	%f75, %f73, %f74;
	mov.f32 	%f76, %f75;
	mov.f32 	%f77, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f78, %f75, %f77;
	mov.f32 	%f79, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p9, %f78, %f79;
	@!%p9 bra 	$Lt_122_29698;
	mov.f32 	%f80, 0f00000000;    	// 0
	mov.f32 	%f81, 0f00000000;    	// 0
	mov.f32 	%f82, 0f00000000;    	// 0
	mov.f32 	%f76, 0f00000000;    	// 0
	bra.uni 	$Lt_122_29442;
$Lt_122_29698:
	mov.f32 	%f83, 0f3f800000;    	// 1
	sub.ftz.f32 	%f84, %f83, %f8;
	rcp.approx.ftz.f32 	%f85, %f75;
	mul.ftz.f32 	%f86, %f10, %f85;
	mov.f32 	%f87, 0f3f800000;    	// 1
	mul.ftz.f32 	%f88, %f10, %f85;
	sub.ftz.f32 	%f89, %f87, %f88;
	mul.ftz.f32 	%f90, %f84, %f1;
	fma.rn.ftz.f32 	%f91, %f1, %f8, %f90;
	mul.ftz.f32 	%f92, %f86, %f91;
	fma.rn.ftz.f32 	%f82, %f5, %f89, %f92;
	mul.ftz.f32 	%f93, %f84, %f2;
	fma.rn.ftz.f32 	%f94, %f2, %f8, %f93;
	mul.ftz.f32 	%f95, %f86, %f94;
	fma.rn.ftz.f32 	%f81, %f6, %f89, %f95;
	mul.ftz.f32 	%f96, %f84, %f3;
	fma.rn.ftz.f32 	%f97, %f3, %f8, %f96;
	mul.ftz.f32 	%f98, %f86, %f97;
	fma.rn.ftz.f32 	%f80, %f7, %f89, %f98;
$Lt_122_29442:
	.loc	22	600	0
	mov.f32 	%f99, %f82;
	mov.f32 	%f100, %f81;
	mov.f32 	%f101, %f80;
	mov.f32 	%f102, %f76;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_299_1;
$Lt_122_24578:
	.loc	22	604	0
	mov.f32 	%f99, %f5;
	mov.f32 	%f100, %f6;
	mov.f32 	%f101, %f7;
	mov.f32 	%f102, %f8;
$LDWendi__Z10GetLuma7098PixelRGB_299_1:
	.loc	22	609	0
	mov.f32 	%f14, %f99;
	mov.f32 	%f15, %f100;
	mov.f32 	%f16, %f101;
	mov.f32 	%f17, %f102;
$Lt_122_28418:
$Lt_122_27906:
$Lt_122_27394:
	ld.param.s32 	%r36, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inDestPitch];
	mul.lo.s32 	%r37, %r36, %r10;
	add.s32 	%r38, %r8, %r37;
	cvt.s64.s32 	%rd13, %r38;
	ld.param.u64 	%rd14, [__cudaparm_BlendMode_IR_BlendMode_LighterColor_Kernel_inDest];
	@!%p2 bra 	$Lt_122_30210;
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f14;
	mov.b32		%r39, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f15;
	mov.b32		%r40, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f16;
	mov.b32		%r41, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f17;
	mov.b32		%r42, %b1; }
	st.global.v4.u16 	[%rd16+0], {%r39,%r40,%r41,%r42};
	.loc	22	609	0
	bra.uni 	$Lt_122_29954;
$Lt_122_30210:
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	st.global.v4.f32 	[%rd18+0], {%f14,%f15,%f16,%f17};
$Lt_122_29954:
$Lt_122_25858:
	.loc	22	609	0
	exit;
$LDWend_BlendMode_IR_BlendMode_LighterColor_Kernel:
	} // BlendMode_IR_BlendMode_LighterColor_Kernel
	.global .texref sBilinearTexture;

	.entry MotionKernel (
		.param .u64 __cudaparm_MotionKernel_inSrc,
		.param .s32 __cudaparm_MotionKernel_inSrcOffset,
		.param .s32 __cudaparm_MotionKernel_inSrcPitch,
		.param .s32 __cudaparm_MotionKernel_inSrcWidth,
		.param .s32 __cudaparm_MotionKernel_inSrcHeight,
		.param .s32 __cudaparm_MotionKernel_inSrcPARNumerator,
		.param .s32 __cudaparm_MotionKernel_inSrcPARDenominator,
		.param .u64 __cudaparm_MotionKernel_inDest,
		.param .s32 __cudaparm_MotionKernel_inDestPitch,
		.param .s32 __cudaparm_MotionKernel_inDestWidth,
		.param .s32 __cudaparm_MotionKernel_inDestHeight,
		.param .u32 __cudaparm_MotionKernel_inDeviceFormat,
		.param .f32 __cudaparm_MotionKernel_inPositionX,
		.param .f32 __cudaparm_MotionKernel_inPositionY,
		.param .f32 __cudaparm_MotionKernel_inAnchorX,
		.param .f32 __cudaparm_MotionKernel_inAnchorY,
		.param .f32 __cudaparm_MotionKernel_inRecipScaleX,
		.param .f32 __cudaparm_MotionKernel_inRecipScaleY,
		.param .f32 __cudaparm_MotionKernel_inRotationRadians,
		.param .f32 __cudaparm_MotionKernel_inPARCorrection,
		.param .f32 __cudaparm_MotionKernel_inAlphaGain,
		.param .u32 __cudaparm_MotionKernel_inBlendMode,
		.param .s8 __cudaparm_MotionKernel_inDoCompositeOver,
		.param .s32 __cudaparm_MotionKernel_inQuality)
	{
	.reg .u32 %r<127>;
	.reg .u64 %rd<19>;
	.reg .f32 %f<1763>;
	.reg .pred %p<149>;
	.loc	6	283	0
$LDWbegin_MotionKernel:
	.loc	6	122	0
	cvt.s32.u32 	%r1, %ctaid.y;
	cvt.s32.u32 	%r2, %ntid.y;
	mul.lo.s32 	%r3, %r1, %r2;
	mov.u32 	%r4, %tid.y;
	add.u32 	%r5, %r3, %r4;
	cvt.rn.f32.s32 	%f1, %r5;
	ld.param.f32 	%f2, [__cudaparm_MotionKernel_inPositionY];
	add.ftz.f32 	%f3, %f1, %f2;
	.loc	6	124	0
	cvt.s32.u32 	%r6, %ctaid.x;
	cvt.s32.u32 	%r7, %ntid.x;
	mul.lo.s32 	%r8, %r6, %r7;
	mov.u32 	%r9, %tid.x;
	add.u32 	%r10, %r8, %r9;
	ld.param.f32 	%f4, [__cudaparm_MotionKernel_inPARCorrection];
	cvt.rn.f32.s32 	%f5, %r10;
	ld.param.f32 	%f6, [__cudaparm_MotionKernel_inPositionX];
	add.ftz.f32 	%f7, %f5, %f6;
	mul.ftz.f32 	%f8, %f4, %f7;
	ld.param.f32 	%f9, [__cudaparm_MotionKernel_inRotationRadians];
	abs.ftz.f32 	%f10, %f9;
	mov.f32 	%f11, 0f370637bd;    	// 8e-006
	setp.lt.ftz.f32 	%p1, %f10, %f11;
	@%p1 bra 	$Lt_123_269826;
	.loc	27	529	0
	ld.param.s32 	%r11, [__cudaparm_MotionKernel_inSrcPARNumerator];
	cvt.rn.f32.s32 	%f12, %r11;
	ld.param.s32 	%r12, [__cudaparm_MotionKernel_inSrcPARDenominator];
	cvt.rn.f32.s32 	%f13, %r12;
	mul.ftz.f32 	%f14, %f12, %f8;
	div.approx.ftz.f32 	%f15, %f14, %f13;
	.loc	6	134	0
	cos.approx.ftz.f32 	%f16, %f9;
	sin.approx.ftz.f32 	%f17, %f9;
	mul.ftz.f32 	%f18, %f16, %f15;
	fma.rn.ftz.f32 	%f19, %f3, %f17, %f18;
	mul.ftz.f32 	%f20, %f13, %f19;
	div.approx.ftz.f32 	%f8, %f20, %f12;
	.loc	6	135	0
	mul.ftz.f32 	%f21, %f17, %f15;
	mul.ftz.f32 	%f22, %f3, %f16;
	sub.ftz.f32 	%f3, %f22, %f21;
$Lt_123_269826:
	.loc	6	139	0
	ld.param.f32 	%f23, [__cudaparm_MotionKernel_inRecipScaleX];
	mul.ftz.f32 	%f8, %f23, %f8;
	.loc	6	140	0
	ld.param.f32 	%f24, [__cudaparm_MotionKernel_inRecipScaleY];
	mul.ftz.f32 	%f3, %f24, %f3;
	.loc	6	139	0
	ld.param.s32 	%r13, [__cudaparm_MotionKernel_inDestWidth];
	setp.le.s32 	%p2, %r13, %r10;
	@%p2 bra 	$Lt_123_317954;
	ld.param.s32 	%r14, [__cudaparm_MotionKernel_inDestHeight];
	setp.le.s32 	%p3, %r14, %r5;
	@%p3 bra 	$Lt_123_317954;
	ld.param.f32 	%f25, [__cudaparm_MotionKernel_inAnchorX];
	add.ftz.f32 	%f26, %f25, %f8;
	mov.f32 	%f27, 0f00000000;    	// 0
	setp.ge.ftz.f32 	%p4, %f26, %f27;
	@!%p4 bra 	$Lt_123_317954;
	ld.param.s32 	%r15, [__cudaparm_MotionKernel_inSrcWidth];
	cvt.rn.f32.s32 	%f28, %r15;
	setp.gt.ftz.f32 	%p5, %f28, %f26;
	@!%p5 bra 	$Lt_123_317954;
	ld.param.f32 	%f29, [__cudaparm_MotionKernel_inAnchorY];
	add.ftz.f32 	%f30, %f29, %f3;
	mov.f32 	%f31, 0f00000000;    	// 0
	setp.ge.ftz.f32 	%p6, %f30, %f31;
	@!%p6 bra 	$Lt_123_317954;
	ld.param.s32 	%r16, [__cudaparm_MotionKernel_inSrcHeight];
	cvt.rn.f32.s32 	%f32, %r16;
	setp.gt.ftz.f32 	%p7, %f32, %f30;
	@!%p7 bra 	$Lt_123_317954;
	ld.param.s32 	%r17, [__cudaparm_MotionKernel_inDeviceFormat];
	mov.s32 	%r18, 0;
	setp.eq.s32 	%p8, %r17, %r18;
	ld.param.s32 	%r19, [__cudaparm_MotionKernel_inQuality];
	mov.u32 	%r20, 1;
	setp.le.s32 	%p9, %r19, %r20;
	@%p9 bra 	$Lt_123_270594;
	ld.param.s32 	%r21, [__cudaparm_MotionKernel_inSrcOffset];
	cvt.rn.f32.s32 	%f33, %r21;
	add.ftz.f32 	%f34, %f33, %f26;
	mov.f32 	%f35, %f34;
	mov.f32 	%f36, %f30;
	mov.f32 	%f37, 0f00000000;    	// 0
	mov.f32 	%f38, %f37;
	mov.f32 	%f39, 0f00000000;    	// 0
	mov.f32 	%f40, %f39;
	tex.2d.v4.f32.f32 {%f41,%f42,%f43,%f44},[sBilinearTexture,{%f35,%f36,%f38,%f40}];
	.loc	6	313	0
	mov.f32 	%f45, %f41;
	mov.f32 	%f46, %f42;
	mov.f32 	%f47, %f43;
	mov.f32 	%f48, %f44;
	mov.f32 	%f49, %f45;
	mov.f32 	%f50, %f46;
	mov.f32 	%f51, %f47;
	mov.f32 	%f52, %f48;
	bra.uni 	$Lt_123_270338;
$Lt_123_270594:
	ld.param.u64 	%rd1, [__cudaparm_MotionKernel_inSrc];
	ld.param.s32 	%r22, [__cudaparm_MotionKernel_inSrcPitch];
	@!%p8 bra 	$Lt_123_271106;
	.loc	21	115	0
	cvt.rzi.ftz.s32.f32 	%r23, %f26;
	cvt.rzi.ftz.s32.f32 	%r24, %f30;
	mul.lo.s32 	%r25, %r24, %r22;
	add.s32 	%r26, %r23, %r25;
	cvt.s64.s32 	%rd2, %r26;
	mul.wide.s32 	%rd3, %r26, 8;
	add.u64 	%rd4, %rd1, %rd3;
	ld.global.v4.u16 	{%r27,%r28,%r29,%r30}, [%rd4+0];
	.loc	6	318	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f53, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r28;
	cvt.ftz.f32.f16	%f54, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r29;
	cvt.ftz.f32.f16	%f55, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r30;
	cvt.ftz.f32.f16	%f56, %b1; }
	bra.uni 	$Lt_123_270850;
$Lt_123_271106:
	cvt.rzi.ftz.s32.f32 	%r31, %f26;
	cvt.rzi.ftz.s32.f32 	%r32, %f30;
	mul.lo.s32 	%r33, %r32, %r22;
	add.s32 	%r34, %r31, %r33;
	cvt.s64.s32 	%rd5, %r34;
	mul.wide.s32 	%rd6, %r34, 16;
	add.u64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f53,%f54,%f55,%f56}, [%rd7+0];
$Lt_123_270850:
	mov.f32 	%f49, %f53;
	mov.f32 	%f50, %f54;
	mov.f32 	%f51, %f55;
	mov.f32 	%f52, %f56;
$Lt_123_270338:
	.loc	6	321	0
	mov.f32 	%f57, %f49;
	mov.f32 	%f58, %f50;
	mov.f32 	%f59, %f51;
	mov.f32 	%f60, %f52;
	ld.param.u32 	%r35, [__cudaparm_MotionKernel_inBlendMode];
	mov.s32 	%r36, 18;
	setp.eq.s32 	%p10, %r35, %r36;
	ld.param.s32 	%r37, [__cudaparm_MotionKernel_inDestPitch];
	mul.lo.s32 	%r38, %r37, %r5;
	add.s32 	%r39, %r10, %r38;
	cvt.s64.s32 	%rd8, %r39;
	ld.param.u64 	%rd9, [__cudaparm_MotionKernel_inDest];
	ld.param.s8 	%r40, [__cudaparm_MotionKernel_inDoCompositeOver];
	mov.u32 	%r41, 0;
	setp.eq.s32 	%p11, %r40, %r41;
	@%p11 bra 	$Lt_123_271618;
	@!%p8 bra 	$Lt_123_272130;
	.loc	21	115	0
	mul.lo.u64 	%rd10, %rd8, 8;
	add.u64 	%rd11, %rd9, %rd10;
	ld.global.v4.u16 	{%r42,%r43,%r44,%r45}, [%rd11+0];
	.loc	6	166	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r42;
	cvt.ftz.f32.f16	%f61, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r43;
	cvt.ftz.f32.f16	%f62, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r44;
	cvt.ftz.f32.f16	%f63, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r45;
	cvt.ftz.f32.f16	%f64, %b1; }
	bra.uni 	$Lt_123_271874;
$Lt_123_272130:
	mul.lo.u64 	%rd12, %rd8, 16;
	add.u64 	%rd13, %rd9, %rd12;
	ld.global.v4.f32 	{%f61,%f62,%f63,%f64}, [%rd13+0];
$Lt_123_271874:
	@!%p10 bra 	$Lt_123_272642;
	.loc	5	255	0
	mov.f32 	%f65, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p12, %f57, %f65;
	@!%p12 bra 	$Lt_123_272898;
	.loc	5	234	0
	neg.ftz.f32 	%f66, %f57;
	lg2.approx.ftz.f32 	%f67, %f66;
	mov.f32 	%f68, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f69, %f67, %f68;
	ex2.approx.ftz.f32 	%f70, %f69;
	neg.ftz.f32 	%f71, %f70;
	bra.uni 	$LDWendi___log2f_300_80;
$Lt_123_272898:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f72, %f57;
	mov.f32 	%f73, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f74, %f72, %f73;
	ex2.approx.ftz.f32 	%f71, %f74;
$LDWendi___log2f_300_80:
	.loc	5	256	0
	mov.f32 	%f75, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p13, %f58, %f75;
	@!%p13 bra 	$Lt_123_273410;
	.loc	5	234	0
	neg.ftz.f32 	%f76, %f58;
	lg2.approx.ftz.f32 	%f77, %f76;
	mov.f32 	%f78, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f79, %f77, %f78;
	ex2.approx.ftz.f32 	%f80, %f79;
	neg.ftz.f32 	%f81, %f80;
	bra.uni 	$LDWendi___log2f_300_78;
$Lt_123_273410:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f82, %f58;
	mov.f32 	%f83, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f84, %f82, %f83;
	ex2.approx.ftz.f32 	%f81, %f84;
$LDWendi___log2f_300_78:
	.loc	5	257	0
	mov.f32 	%f85, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p14, %f59, %f85;
	@!%p14 bra 	$Lt_123_273922;
	.loc	5	234	0
	neg.ftz.f32 	%f86, %f59;
	lg2.approx.ftz.f32 	%f87, %f86;
	mov.f32 	%f88, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f89, %f87, %f88;
	ex2.approx.ftz.f32 	%f90, %f89;
	neg.ftz.f32 	%f91, %f90;
	bra.uni 	$LDWendi___log2f_300_76;
$Lt_123_273922:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f92, %f59;
	mov.f32 	%f93, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f94, %f92, %f93;
	ex2.approx.ftz.f32 	%f91, %f94;
$LDWendi___log2f_300_76:
	.loc	22	83	0
	cvt.ftz.sat.f32.f32 	%f95, %f60;
	cvt.ftz.sat.f32.f32 	%f96, %f64;
	ld.param.f32 	%f97, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f98, %f97, %f95;
	cvt.ftz.sat.f32.f32 	%f99, %f98;
	mov.f32 	%f100, 0f3f800000;   	// 1
	sub.ftz.f32 	%f101, %f100, %f99;
	mul.ftz.f32 	%f102, %f96, %f101;
	add.ftz.f32 	%f103, %f99, %f102;
	mov.f32 	%f104, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f105, %f103, %f104;
	mov.f32 	%f106, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p15, %f105, %f106;
	@!%p15 bra 	$Lt_123_274690;
	mov.f32 	%f107, 0f00000000;   	// 0
	mov.f32 	%f108, 0f00000000;   	// 0
	mov.f32 	%f109, 0f00000000;   	// 0
	mov.f32 	%f110, 0f00000000;   	// 0
	bra.uni 	$Lt_123_274434;
$Lt_123_274690:
	mov.f32 	%f111, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p16, %f61, %f111;
	@!%p16 bra 	$Lt_123_274946;
	.loc	5	234	0
	neg.ftz.f32 	%f112, %f61;
	lg2.approx.ftz.f32 	%f113, %f112;
	mov.f32 	%f114, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f115, %f113, %f114;
	ex2.approx.ftz.f32 	%f116, %f115;
	neg.ftz.f32 	%f117, %f116;
	bra.uni 	$LDWendi___log2f_300_74;
$Lt_123_274946:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f118, %f61;
	mov.f32 	%f119, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f120, %f118, %f119;
	ex2.approx.ftz.f32 	%f117, %f120;
$LDWendi___log2f_300_74:
	.loc	22	97	0
	mov.f32 	%f121, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p17, %f62, %f121;
	@!%p17 bra 	$Lt_123_275458;
	.loc	5	234	0
	neg.ftz.f32 	%f122, %f62;
	lg2.approx.ftz.f32 	%f123, %f122;
	mov.f32 	%f124, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f125, %f123, %f124;
	ex2.approx.ftz.f32 	%f126, %f125;
	neg.ftz.f32 	%f127, %f126;
	bra.uni 	$LDWendi___log2f_300_72;
$Lt_123_275458:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f128, %f62;
	mov.f32 	%f129, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f130, %f128, %f129;
	ex2.approx.ftz.f32 	%f127, %f130;
$LDWendi___log2f_300_72:
	.loc	22	98	0
	mov.f32 	%f131, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p18, %f63, %f131;
	@!%p18 bra 	$Lt_123_275970;
	.loc	5	234	0
	neg.ftz.f32 	%f132, %f63;
	lg2.approx.ftz.f32 	%f133, %f132;
	mov.f32 	%f134, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f135, %f133, %f134;
	ex2.approx.ftz.f32 	%f136, %f135;
	neg.ftz.f32 	%f137, %f136;
	bra.uni 	$LDWendi___log2f_300_70;
$Lt_123_275970:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f138, %f63;
	mov.f32 	%f139, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f140, %f138, %f139;
	ex2.approx.ftz.f32 	%f137, %f140;
$LDWendi___log2f_300_70:
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f141, %f103;
	mov.f32 	%f142, %f141;
	mov.f32 	%f143, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f144, %f141, %f143;
	mov.f32 	%f145, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p19, %f144, %f145;
	@%p19 bra 	$Lt_123_276738;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f146, %f141;
	mul.ftz.f32 	%f147, %f91, %f99;
	fma.rn.ftz.f32 	%f148, %f102, %f137, %f147;
	mul.ftz.f32 	%f149, %f146, %f148;
	.loc	5	214	0
	mul.ftz.f32 	%f150, %f81, %f99;
	fma.rn.ftz.f32 	%f151, %f102, %f127, %f150;
	mul.ftz.f32 	%f152, %f146, %f151;
	.loc	5	215	0
	mul.ftz.f32 	%f153, %f71, %f99;
	fma.rn.ftz.f32 	%f154, %f102, %f117, %f153;
	mul.ftz.f32 	%f155, %f146, %f154;
	bra.uni 	$Lt_123_276482;
$Lt_123_276738:
	.loc	5	219	0
	mov.f32 	%f149, 0f00000000;   	// 0
	mov.f32 	%f152, 0f00000000;   	// 0
	mov.f32 	%f155, 0f00000000;   	// 0
	mov.f32 	%f142, 0f00000000;   	// 0
$Lt_123_276482:
	.loc	5	266	0
	mov.f32 	%f156, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p20, %f155, %f156;
	@!%p20 bra 	$Lt_123_276994;
	.loc	5	242	0
	neg.ftz.f32 	%f157, %f155;
	lg2.approx.ftz.f32 	%f158, %f157;
	mov.f32 	%f159, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f160, %f158, %f159;
	ex2.approx.ftz.f32 	%f161, %f160;
	neg.ftz.f32 	%f162, %f161;
	bra.uni 	$LDWendi___log2f_300_68;
$Lt_123_276994:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f163, %f155;
	mov.f32 	%f164, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f165, %f163, %f164;
	ex2.approx.ftz.f32 	%f162, %f165;
$LDWendi___log2f_300_68:
	.loc	5	267	0
	mov.f32 	%f166, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p21, %f152, %f166;
	@!%p21 bra 	$Lt_123_277506;
	.loc	5	242	0
	neg.ftz.f32 	%f167, %f152;
	lg2.approx.ftz.f32 	%f168, %f167;
	mov.f32 	%f169, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f170, %f168, %f169;
	ex2.approx.ftz.f32 	%f171, %f170;
	neg.ftz.f32 	%f172, %f171;
	bra.uni 	$LDWendi___log2f_300_66;
$Lt_123_277506:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f173, %f152;
	mov.f32 	%f174, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f175, %f173, %f174;
	ex2.approx.ftz.f32 	%f172, %f175;
$LDWendi___log2f_300_66:
	.loc	5	268	0
	mov.f32 	%f176, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p22, %f149, %f176;
	@!%p22 bra 	$Lt_123_278018;
	.loc	5	242	0
	neg.ftz.f32 	%f177, %f149;
	lg2.approx.ftz.f32 	%f178, %f177;
	mov.f32 	%f179, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f180, %f178, %f179;
	ex2.approx.ftz.f32 	%f181, %f180;
	neg.ftz.f32 	%f182, %f181;
	bra.uni 	$LDWendi___log2f_300_64;
$Lt_123_278018:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f183, %f149;
	mov.f32 	%f184, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f185, %f183, %f184;
	ex2.approx.ftz.f32 	%f182, %f185;
$LDWendi___log2f_300_64:
	.loc	22	101	0
	mov.f32 	%f110, %f162;
	mov.f32 	%f109, %f172;
	mov.f32 	%f108, %f182;
	mov.f32 	%f107, %f142;
$Lt_123_274434:
	.loc	6	170	0
	mov.f32 	%f57, %f110;
	mov.f32 	%f58, %f109;
	mov.f32 	%f59, %f108;
	mov.f32 	%f60, %f107;
	bra.uni 	$Lt_123_315906;
$Lt_123_272642:
	.loc	6	189	0
	mov.u32 	%r46, 3;
	setp.eq.s32 	%p23, %r35, %r46;
	@%p23 bra 	$Lt_123_258;
	mov.u32 	%r47, 11;
	setp.eq.s32 	%p24, %r35, %r47;
	@%p24 bra 	$Lt_123_770;
	mov.u32 	%r48, 17;
	setp.eq.s32 	%p25, %r35, %r48;
	@%p25 bra 	$Lt_123_1026;
	mov.u32 	%r49, 22;
	setp.eq.s32 	%p26, %r35, %r49;
	@%p26 bra 	$Lt_123_1282;
	mov.u32 	%r50, 6;
	setp.eq.s32 	%p27, %r35, %r50;
	@%p27 bra 	$Lt_123_1538;
	mov.u32 	%r51, 1;
	setp.eq.s32 	%p28, %r35, %r51;
	@%p28 bra 	$Lt_123_1794;
	mov.u32 	%r52, 13;
	setp.eq.s32 	%p29, %r35, %r52;
	@%p29 bra 	$Lt_123_2050;
	mov.u32 	%r53, 4;
	setp.eq.s32 	%p30, %r35, %r53;
	@%p30 bra 	$Lt_123_2306;
	mov.u32 	%r54, 2;
	setp.eq.s32 	%p31, %r35, %r54;
	@%p31 bra 	$Lt_123_2562;
	mov.u32 	%r55, 14;
	setp.eq.s32 	%p32, %r35, %r55;
	@%p32 bra 	$Lt_123_2818;
	mov.u32 	%r56, 12;
	setp.eq.s32 	%p33, %r35, %r56;
	@%p33 bra 	$Lt_123_3074;
	mov.u32 	%r57, 19;
	setp.eq.s32 	%p34, %r35, %r57;
	@%p34 bra 	$Lt_123_3330;
	mov.u32 	%r58, 23;
	setp.eq.s32 	%p35, %r35, %r58;
	@%p35 bra 	$Lt_123_3586;
	mov.u32 	%r59, 8;
	setp.eq.s32 	%p36, %r35, %r59;
	@%p36 bra 	$Lt_123_3842;
	mov.u32 	%r60, 24;
	setp.eq.s32 	%p37, %r35, %r60;
	@%p37 bra 	$Lt_123_4098;
	mov.u32 	%r61, 15;
	setp.eq.s32 	%p38, %r35, %r61;
	@%p38 bra 	$Lt_123_4354;
	mov.u32 	%r62, 20;
	setp.eq.s32 	%p39, %r35, %r62;
	@%p39 bra 	$Lt_123_4610;
	mov.u32 	%r63, 9;
	setp.eq.s32 	%p40, %r35, %r63;
	@%p40 bra 	$Lt_123_4866;
	mov.u32 	%r64, 5;
	setp.eq.s32 	%p41, %r35, %r64;
	@%p41 bra 	$Lt_123_5122;
	mov.u32 	%r65, 7;
	setp.eq.s32 	%p42, %r35, %r65;
	@%p42 bra 	$Lt_123_5378;
	mov.u32 	%r66, 25;
	setp.eq.s32 	%p43, %r35, %r66;
	@%p43 bra 	$Lt_123_5634;
	mov.u32 	%r67, 26;
	setp.eq.s32 	%p44, %r35, %r67;
	@%p44 bra 	$Lt_123_5890;
	mov.u32 	%r68, 10;
	setp.eq.s32 	%p45, %r35, %r68;
	@%p45 bra 	$Lt_123_6146;
	mov.u32 	%r69, 21;
	setp.eq.s32 	%p46, %r35, %r69;
	@%p46 bra 	$Lt_123_6402;
	mov.u32 	%r70, 0;
	setp.eq.s32 	%p47, %r35, %r70;
	@%p47 bra 	$Lt_123_6658;
	mov.u32 	%r71, 16;
	setp.eq.s32 	%p48, %r35, %r71;
	@%p48 bra 	$Lt_123_6914;
	bra.uni 	$Lt_123_315906;
$Lt_123_258:
	.loc	22	469	0
	ld.param.f32 	%f186, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f186, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f189, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f189;
	mov.f32 	%f191, %f190;
	mov.f32 	%f192, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f193, %f190, %f192;
	mov.f32 	%f194, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p49, %f193, %f194;
	@!%p49 bra 	$Lt_123_278786;
	mov.f32 	%f195, 0f00000000;   	// 0
	mov.f32 	%f196, 0f00000000;   	// 0
	mov.f32 	%f197, 0f00000000;   	// 0
	mov.f32 	%f191, 0f00000000;   	// 0
	bra.uni 	$Lt_123_278530;
$Lt_123_278786:
	mov.f32 	%f198, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f198, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f202, 0f3f800000;   	// 1
	mul.ftz.f32 	%f203, %f187, %f200;
	sub.ftz.f32 	%f204, %f202, %f203;
	min.ftz.f32 	%f205, %f57, %f61;
	mul.ftz.f32 	%f206, %f64, %f205;
	fma.rn.ftz.f32 	%f207, %f57, %f199, %f206;
	mul.ftz.f32 	%f208, %f201, %f207;
	fma.rn.ftz.f32 	%f197, %f61, %f204, %f208;
	min.ftz.f32 	%f209, %f58, %f62;
	mul.ftz.f32 	%f210, %f64, %f209;
	fma.rn.ftz.f32 	%f211, %f58, %f199, %f210;
	mul.ftz.f32 	%f212, %f201, %f211;
	fma.rn.ftz.f32 	%f196, %f62, %f204, %f212;
	min.ftz.f32 	%f213, %f59, %f63;
	mul.ftz.f32 	%f214, %f64, %f213;
	fma.rn.ftz.f32 	%f215, %f59, %f199, %f214;
	mul.ftz.f32 	%f216, %f201, %f215;
	fma.rn.ftz.f32 	%f195, %f63, %f204, %f216;
$Lt_123_278530:
	.loc	6	191	0
	mov.f32 	%f57, %f197;
	mov.f32 	%f58, %f196;
	mov.f32 	%f59, %f195;
	mov.f32 	%f60, %f191;
	bra.uni 	$Lt_123_315906;
$Lt_123_770:
	.loc	22	470	0
	ld.param.f32 	%f217, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f217, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f218, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f218;
	mov.f32 	%f219, %f190;
	mov.f32 	%f220, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f221, %f190, %f220;
	mov.f32 	%f222, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p50, %f221, %f222;
	@!%p50 bra 	$Lt_123_279298;
	mov.f32 	%f223, 0f00000000;   	// 0
	mov.f32 	%f224, 0f00000000;   	// 0
	mov.f32 	%f225, 0f00000000;   	// 0
	mov.f32 	%f219, 0f00000000;   	// 0
	bra.uni 	$Lt_123_279042;
$Lt_123_279298:
	mov.f32 	%f226, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f226, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f227, 0f3f800000;   	// 1
	mul.ftz.f32 	%f228, %f187, %f200;
	sub.ftz.f32 	%f204, %f227, %f228;
	max.ftz.f32 	%f229, %f57, %f61;
	mul.ftz.f32 	%f230, %f64, %f229;
	fma.rn.ftz.f32 	%f231, %f57, %f199, %f230;
	mul.ftz.f32 	%f232, %f201, %f231;
	fma.rn.ftz.f32 	%f225, %f61, %f204, %f232;
	max.ftz.f32 	%f233, %f58, %f62;
	mul.ftz.f32 	%f234, %f64, %f233;
	fma.rn.ftz.f32 	%f235, %f58, %f199, %f234;
	mul.ftz.f32 	%f236, %f201, %f235;
	fma.rn.ftz.f32 	%f224, %f62, %f204, %f236;
	max.ftz.f32 	%f237, %f59, %f63;
	mul.ftz.f32 	%f238, %f64, %f237;
	fma.rn.ftz.f32 	%f239, %f59, %f199, %f238;
	mul.ftz.f32 	%f240, %f201, %f239;
	fma.rn.ftz.f32 	%f223, %f63, %f204, %f240;
$Lt_123_279042:
	.loc	6	192	0
	mov.f32 	%f57, %f225;
	mov.f32 	%f58, %f224;
	mov.f32 	%f59, %f223;
	mov.f32 	%f60, %f219;
	bra.uni 	$Lt_123_315906;
$Lt_123_1026:
	.loc	22	471	0
	ld.param.f32 	%f241, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f241, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f242, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f242;
	mov.f32 	%f243, %f190;
	mov.f32 	%f244, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f245, %f190, %f244;
	mov.f32 	%f246, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p51, %f245, %f246;
	@!%p51 bra 	$Lt_123_279810;
	mov.f32 	%f247, 0f00000000;   	// 0
	mov.f32 	%f248, 0f00000000;   	// 0
	mov.f32 	%f249, 0f00000000;   	// 0
	mov.f32 	%f243, 0f00000000;   	// 0
	bra.uni 	$Lt_123_279554;
$Lt_123_279810:
	mov.f32 	%f250, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f250, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f251, 0f3f800000;   	// 1
	mul.ftz.f32 	%f252, %f187, %f200;
	sub.ftz.f32 	%f204, %f251, %f252;
	mul.ftz.f32 	%f253, %f57, %f61;
	mov.f32 	%f254, 0f00000000;   	// 0
	max.ftz.f32 	%f255, %f253, %f254;
	mov.f32 	%f256, 0f3f800000;   	// 1
	min.ftz.f32 	%f257, %f255, %f256;
	mul.ftz.f32 	%f258, %f64, %f257;
	fma.rn.ftz.f32 	%f259, %f57, %f199, %f258;
	mul.ftz.f32 	%f260, %f201, %f259;
	fma.rn.ftz.f32 	%f249, %f61, %f204, %f260;
	mul.ftz.f32 	%f261, %f58, %f62;
	mov.f32 	%f262, 0f00000000;   	// 0
	max.ftz.f32 	%f263, %f261, %f262;
	mov.f32 	%f264, 0f3f800000;   	// 1
	min.ftz.f32 	%f265, %f263, %f264;
	mul.ftz.f32 	%f266, %f64, %f265;
	fma.rn.ftz.f32 	%f267, %f58, %f199, %f266;
	mul.ftz.f32 	%f268, %f201, %f267;
	fma.rn.ftz.f32 	%f248, %f62, %f204, %f268;
	mul.ftz.f32 	%f269, %f59, %f63;
	mov.f32 	%f270, 0f00000000;   	// 0
	max.ftz.f32 	%f271, %f269, %f270;
	mov.f32 	%f272, 0f3f800000;   	// 1
	min.ftz.f32 	%f273, %f271, %f272;
	mul.ftz.f32 	%f274, %f64, %f273;
	fma.rn.ftz.f32 	%f275, %f59, %f199, %f274;
	mul.ftz.f32 	%f276, %f201, %f275;
	fma.rn.ftz.f32 	%f247, %f63, %f204, %f276;
$Lt_123_279554:
	.loc	6	193	0
	mov.f32 	%f57, %f249;
	mov.f32 	%f58, %f248;
	mov.f32 	%f59, %f247;
	mov.f32 	%f60, %f243;
	bra.uni 	$Lt_123_315906;
$Lt_123_1282:
	.loc	22	472	0
	ld.param.f32 	%f277, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f277, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f278, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f278;
	mov.f32 	%f279, %f190;
	mov.f32 	%f280, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f281, %f190, %f280;
	mov.f32 	%f282, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p52, %f281, %f282;
	@!%p52 bra 	$Lt_123_280322;
	mov.f32 	%f283, 0f00000000;   	// 0
	mov.f32 	%f284, 0f00000000;   	// 0
	mov.f32 	%f285, 0f00000000;   	// 0
	mov.f32 	%f279, 0f00000000;   	// 0
	bra.uni 	$Lt_123_280066;
$Lt_123_280322:
	mov.f32 	%f286, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f286, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f287, 0f3f800000;   	// 1
	mul.ftz.f32 	%f288, %f187, %f200;
	sub.ftz.f32 	%f204, %f287, %f288;
	mov.f32 	%f289, 0f3f800000;   	// 1
	mov.f32 	%f290, 0f3f800000;   	// 1
	mov.f32 	%f291, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f292, %f61, %f291;
	mov.f32 	%f293, 0f3f800000;   	// 1
	min.ftz.f32 	%f294, %f292, %f293;
	sub.ftz.f32 	%f295, %f290, %f294;
	mov.f32 	%f296, 0f3f800000;   	// 1
	mov.f32 	%f297, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f298, %f57, %f297;
	mov.f32 	%f299, 0f3f800000;   	// 1
	min.ftz.f32 	%f300, %f298, %f299;
	sub.ftz.f32 	%f301, %f296, %f300;
	mul.ftz.f32 	%f302, %f295, %f301;
	sub.ftz.f32 	%f303, %f289, %f302;
	mov.f32 	%f304, 0f00000000;   	// 0
	max.ftz.f32 	%f305, %f303, %f304;
	mov.f32 	%f306, 0f3f800000;   	// 1
	min.ftz.f32 	%f307, %f305, %f306;
	mul.ftz.f32 	%f308, %f64, %f307;
	fma.rn.ftz.f32 	%f309, %f57, %f199, %f308;
	mul.ftz.f32 	%f310, %f201, %f309;
	fma.rn.ftz.f32 	%f285, %f61, %f204, %f310;
	mov.f32 	%f311, 0f3f800000;   	// 1
	mov.f32 	%f312, 0f3f800000;   	// 1
	mov.f32 	%f313, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f314, %f62, %f313;
	mov.f32 	%f315, 0f3f800000;   	// 1
	min.ftz.f32 	%f316, %f314, %f315;
	sub.ftz.f32 	%f317, %f312, %f316;
	mov.f32 	%f318, 0f3f800000;   	// 1
	mov.f32 	%f319, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f320, %f58, %f319;
	mov.f32 	%f321, 0f3f800000;   	// 1
	min.ftz.f32 	%f322, %f320, %f321;
	sub.ftz.f32 	%f323, %f318, %f322;
	mul.ftz.f32 	%f324, %f317, %f323;
	sub.ftz.f32 	%f325, %f311, %f324;
	mov.f32 	%f326, 0f00000000;   	// 0
	max.ftz.f32 	%f327, %f325, %f326;
	mov.f32 	%f328, 0f3f800000;   	// 1
	min.ftz.f32 	%f329, %f327, %f328;
	mul.ftz.f32 	%f330, %f64, %f329;
	fma.rn.ftz.f32 	%f331, %f58, %f199, %f330;
	mul.ftz.f32 	%f332, %f201, %f331;
	fma.rn.ftz.f32 	%f284, %f62, %f204, %f332;
	mov.f32 	%f333, 0f3f800000;   	// 1
	mov.f32 	%f334, 0f3f800000;   	// 1
	mov.f32 	%f335, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f336, %f63, %f335;
	mov.f32 	%f337, 0f3f800000;   	// 1
	min.ftz.f32 	%f338, %f336, %f337;
	sub.ftz.f32 	%f339, %f334, %f338;
	mov.f32 	%f340, 0f3f800000;   	// 1
	mov.f32 	%f341, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f342, %f59, %f341;
	mov.f32 	%f343, 0f3f800000;   	// 1
	min.ftz.f32 	%f344, %f342, %f343;
	sub.ftz.f32 	%f345, %f340, %f344;
	mul.ftz.f32 	%f346, %f339, %f345;
	sub.ftz.f32 	%f347, %f333, %f346;
	mov.f32 	%f348, 0f00000000;   	// 0
	max.ftz.f32 	%f349, %f347, %f348;
	mov.f32 	%f350, 0f3f800000;   	// 1
	min.ftz.f32 	%f351, %f349, %f350;
	mul.ftz.f32 	%f352, %f64, %f351;
	fma.rn.ftz.f32 	%f353, %f59, %f199, %f352;
	mul.ftz.f32 	%f354, %f201, %f353;
	fma.rn.ftz.f32 	%f283, %f63, %f204, %f354;
$Lt_123_280066:
	.loc	6	194	0
	mov.f32 	%f57, %f285;
	mov.f32 	%f58, %f284;
	mov.f32 	%f59, %f283;
	mov.f32 	%f60, %f279;
	bra.uni 	$Lt_123_315906;
$Lt_123_1538:
	.loc	22	526	0
	ld.param.f32 	%f355, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f355, %f60;
	mov.f32 	%f356, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f357, %f187, %f356;
	mov.f32 	%f358, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p53, %f357, %f358;
	@!%p53 bra 	$Lt_123_235522;
	.loc	22	528	0
	mov.f32 	%f359, %f61;
	mov.f32 	%f360, %f62;
	mov.f32 	%f361, %f63;
	mov.f32 	%f362, %f64;
	bra.uni 	$LDWendi__Z4Randj_300_56;
$Lt_123_235522:
	.loc	22	530	0
	mov.f32 	%f363, 0f370637bd;   	// 8e-006
	add.ftz.f32 	%f364, %f187, %f363;
	mov.f32 	%f365, 0f3f800000;   	// 1
	setp.ge.ftz.f32 	%p54, %f364, %f365;
	@!%p54 bra 	$Lt_123_235778;
	.loc	22	532	0
	mov.f32 	%f359, %f57;
	mov.f32 	%f360, %f58;
	mov.f32 	%f361, %f59;
	mov.f32 	%f362, %f60;
	bra.uni 	$LDWendi__Z4Randj_300_56;
$Lt_123_235778:
	.loc	21	143	0
	shr.u32 	%r72, %r5, 13;
	mov.s32 	%r73, 1;
	sub.s32 	%r74, %r73, %r10;
	sub.u32 	%r75, %r10, %r5;
	sub.u32 	%r76, %r74, %r5;
	xor.b32 	%r77, %r72, %r76;
	shl.b32 	%r78, %r77, 8;
	sub.u32 	%r79, %r75, %r77;
	sub.u32 	%r80, %r5, %r77;
	xor.b32 	%r81, %r78, %r79;
	shr.u32 	%r82, %r81, 13;
	sub.u32 	%r83, %r80, %r81;
	sub.u32 	%r84, %r77, %r81;
	xor.b32 	%r85, %r82, %r83;
	shr.u32 	%r86, %r85, 12;
	sub.u32 	%r87, %r84, %r85;
	xor.b32 	%r88, %r86, %r87;
	sub.u32 	%r89, %r81, %r85;
	sub.u32 	%r90, %r89, %r88;
	shl.b32 	%r91, %r88, 16;
	xor.b32 	%r92, %r90, %r91;
	.loc	21	144	0
	sub.u32 	%r93, %r85, %r88;
	sub.u32 	%r94, %r93, %r92;
	shr.u32 	%r95, %r92, 5;
	xor.b32 	%r96, %r94, %r95;
	.loc	21	145	0
	sub.u32 	%r97, %r88, %r92;
	sub.u32 	%r98, %r97, %r96;
	shr.u32 	%r99, %r96, 3;
	xor.b32 	%r100, %r98, %r99;
	.loc	21	146	0
	sub.u32 	%r101, %r92, %r96;
	sub.u32 	%r102, %r101, %r100;
	shl.b32 	%r103, %r100, 10;
	xor.b32 	%r104, %r102, %r103;
	.loc	21	147	0
	sub.u32 	%r105, %r96, %r100;
	sub.u32 	%r106, %r105, %r104;
	shr.u32 	%r107, %r104, 15;
	xor.b32 	%r108, %r106, %r107;
	.loc	22	537	0
	mov.f32 	%f366, 0f46fffe00;   	// 32767
	mul.ftz.f32 	%f367, %f187, %f366;
	cvt.rzi.ftz.s32.f32 	%r109, %f367;
	mul.lo.u32 	%r110, %r108, 1103515245;
	add.u32 	%r111, %r110, 12345;
	shr.u32 	%r112, %r111, 16;
	and.b32 	%r113, %r112, 255;
	shl.b32 	%r114, %r113, 7;
	mul.lo.u32 	%r115, %r108, -1029531031;
	sub.u32 	%r116, %r115, 740551042;
	shr.u32 	%r117, %r116, 16;
	and.b32 	%r118, %r117, 255;
	xor.b32 	%r119, %r114, %r118;
	setp.lt.s32 	%p55, %r109, %r119;
	@%p55 bra 	$Lt_123_280834;
	mov.f32 	%f368, %f57;
	mov.f32 	%f369, %f58;
	mov.f32 	%f370, %f59;
	mov.f32 	%f371, %f60;
	bra.uni 	$Lt_123_280578;
$Lt_123_280834:
	mov.f32 	%f368, %f61;
	mov.f32 	%f369, %f62;
	mov.f32 	%f370, %f63;
	mov.f32 	%f371, %f64;
$Lt_123_280578:
	mov.f32 	%f359, %f368;
	mov.f32 	%f360, %f369;
	mov.f32 	%f361, %f370;
	mov.f32 	%f362, %f371;
$LDWendi__Z4Randj_300_56:
	.loc	6	195	0
	mov.f32 	%f57, %f359;
	mov.f32 	%f58, %f360;
	mov.f32 	%f59, %f361;
	mov.f32 	%f60, %f362;
	bra.uni 	$Lt_123_315906;
$Lt_123_1794:
	.loc	22	473	0
	ld.param.f32 	%f372, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f372, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f373, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f373;
	mov.f32 	%f374, %f190;
	mov.f32 	%f375, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f376, %f190, %f375;
	mov.f32 	%f377, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p56, %f376, %f377;
	@!%p56 bra 	$Lt_123_281346;
	mov.f32 	%f378, 0f00000000;   	// 0
	mov.f32 	%f379, 0f00000000;   	// 0
	mov.f32 	%f380, 0f00000000;   	// 0
	mov.f32 	%f374, 0f00000000;   	// 0
	bra.uni 	$Lt_123_281090;
$Lt_123_281346:
	mov.f32 	%f381, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f381, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f382, 0f3f800000;   	// 1
	mul.ftz.f32 	%f383, %f187, %f200;
	sub.ftz.f32 	%f204, %f382, %f383;
	mov.f32 	%f384, 0f3f800000;   	// 1
	mov.f32 	%f385, 0f3f800000;   	// 1
	mov.f32 	%f386, 0f00000000;   	// 0
	max.ftz.f32 	%f387, %f61, %f386;
	mov.f32 	%f388, 0f3f800000;   	// 1
	min.ftz.f32 	%f389, %f387, %f388;
	sub.ftz.f32 	%f390, %f385, %f389;
	mov.f32 	%f391, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f392, %f57, %f391;
	mov.f32 	%f393, 0f3f800000;   	// 1
	min.ftz.f32 	%f394, %f392, %f393;
	div.approx.ftz.f32 	%f395, %f390, %f394;
	sub.ftz.f32 	%f396, %f384, %f395;
	mov.f32 	%f397, 0f00000000;   	// 0
	max.ftz.f32 	%f398, %f396, %f397;
	mov.f32 	%f399, 0f3f800000;   	// 1
	min.ftz.f32 	%f400, %f398, %f399;
	mul.ftz.f32 	%f401, %f64, %f400;
	fma.rn.ftz.f32 	%f402, %f57, %f199, %f401;
	mul.ftz.f32 	%f403, %f201, %f402;
	fma.rn.ftz.f32 	%f380, %f61, %f204, %f403;
	mov.f32 	%f404, 0f3f800000;   	// 1
	mov.f32 	%f405, 0f3f800000;   	// 1
	mov.f32 	%f406, 0f00000000;   	// 0
	max.ftz.f32 	%f407, %f62, %f406;
	mov.f32 	%f408, 0f3f800000;   	// 1
	min.ftz.f32 	%f409, %f407, %f408;
	sub.ftz.f32 	%f410, %f405, %f409;
	mov.f32 	%f411, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f412, %f58, %f411;
	mov.f32 	%f413, 0f3f800000;   	// 1
	min.ftz.f32 	%f414, %f412, %f413;
	div.approx.ftz.f32 	%f415, %f410, %f414;
	sub.ftz.f32 	%f416, %f404, %f415;
	mov.f32 	%f417, 0f00000000;   	// 0
	max.ftz.f32 	%f418, %f416, %f417;
	mov.f32 	%f419, 0f3f800000;   	// 1
	min.ftz.f32 	%f420, %f418, %f419;
	mul.ftz.f32 	%f421, %f64, %f420;
	fma.rn.ftz.f32 	%f422, %f58, %f199, %f421;
	mul.ftz.f32 	%f423, %f201, %f422;
	fma.rn.ftz.f32 	%f379, %f62, %f204, %f423;
	mov.f32 	%f424, 0f3f800000;   	// 1
	mov.f32 	%f425, 0f3f800000;   	// 1
	mov.f32 	%f426, 0f00000000;   	// 0
	max.ftz.f32 	%f427, %f63, %f426;
	mov.f32 	%f428, 0f3f800000;   	// 1
	min.ftz.f32 	%f429, %f427, %f428;
	sub.ftz.f32 	%f430, %f425, %f429;
	mov.f32 	%f431, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f432, %f59, %f431;
	mov.f32 	%f433, 0f3f800000;   	// 1
	min.ftz.f32 	%f434, %f432, %f433;
	div.approx.ftz.f32 	%f435, %f430, %f434;
	sub.ftz.f32 	%f436, %f424, %f435;
	mov.f32 	%f437, 0f00000000;   	// 0
	max.ftz.f32 	%f438, %f436, %f437;
	mov.f32 	%f439, 0f3f800000;   	// 1
	min.ftz.f32 	%f440, %f438, %f439;
	mul.ftz.f32 	%f441, %f64, %f440;
	fma.rn.ftz.f32 	%f442, %f59, %f199, %f441;
	mul.ftz.f32 	%f443, %f201, %f442;
	fma.rn.ftz.f32 	%f378, %f63, %f204, %f443;
$Lt_123_281090:
	.loc	6	196	0
	mov.f32 	%f57, %f380;
	mov.f32 	%f58, %f379;
	mov.f32 	%f59, %f378;
	mov.f32 	%f60, %f374;
	bra.uni 	$Lt_123_315906;
$Lt_123_2050:
	.loc	22	474	0
	ld.param.f32 	%f444, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f444, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f445, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f445;
	mov.f32 	%f446, %f190;
	mov.f32 	%f447, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f448, %f190, %f447;
	mov.f32 	%f449, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p57, %f448, %f449;
	@!%p57 bra 	$Lt_123_281858;
	mov.f32 	%f450, 0f00000000;   	// 0
	mov.f32 	%f451, 0f00000000;   	// 0
	mov.f32 	%f452, 0f00000000;   	// 0
	mov.f32 	%f446, 0f00000000;   	// 0
	bra.uni 	$Lt_123_281602;
$Lt_123_281858:
	mov.f32 	%f453, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f453, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f454, 0f3f800000;   	// 1
	mul.ftz.f32 	%f455, %f187, %f200;
	sub.ftz.f32 	%f204, %f454, %f455;
	mov.f32 	%f456, 0f00000000;   	// 0
	max.ftz.f32 	%f457, %f57, %f456;
	mov.f32 	%f458, 0f3f800000;   	// 1
	min.ftz.f32 	%f459, %f457, %f458;
	mov.f32 	%f460, 0f00000000;   	// 0
	max.ftz.f32 	%f461, %f61, %f460;
	mov.f32 	%f462, 0f3f800000;   	// 1
	min.ftz.f32 	%f463, %f461, %f462;
	add.ftz.f32 	%f464, %f459, %f463;
	mov.f32 	%f465, 0fbf800000;   	// -1
	add.ftz.f32 	%f466, %f464, %f465;
	mov.f32 	%f467, 0f00000000;   	// 0
	max.ftz.f32 	%f468, %f466, %f467;
	mov.f32 	%f469, 0f3f800000;   	// 1
	min.ftz.f32 	%f470, %f468, %f469;
	mul.ftz.f32 	%f471, %f64, %f470;
	fma.rn.ftz.f32 	%f472, %f57, %f199, %f471;
	mul.ftz.f32 	%f473, %f201, %f472;
	fma.rn.ftz.f32 	%f452, %f61, %f204, %f473;
	mov.f32 	%f474, 0f00000000;   	// 0
	max.ftz.f32 	%f475, %f58, %f474;
	mov.f32 	%f476, 0f3f800000;   	// 1
	min.ftz.f32 	%f477, %f475, %f476;
	mov.f32 	%f478, 0f00000000;   	// 0
	max.ftz.f32 	%f479, %f62, %f478;
	mov.f32 	%f480, 0f3f800000;   	// 1
	min.ftz.f32 	%f481, %f479, %f480;
	add.ftz.f32 	%f482, %f477, %f481;
	mov.f32 	%f483, 0fbf800000;   	// -1
	add.ftz.f32 	%f484, %f482, %f483;
	mov.f32 	%f485, 0f00000000;   	// 0
	max.ftz.f32 	%f486, %f484, %f485;
	mov.f32 	%f487, 0f3f800000;   	// 1
	min.ftz.f32 	%f488, %f486, %f487;
	mul.ftz.f32 	%f489, %f64, %f488;
	fma.rn.ftz.f32 	%f490, %f58, %f199, %f489;
	mul.ftz.f32 	%f491, %f201, %f490;
	fma.rn.ftz.f32 	%f451, %f62, %f204, %f491;
	mov.f32 	%f492, 0f00000000;   	// 0
	max.ftz.f32 	%f493, %f59, %f492;
	mov.f32 	%f494, 0f3f800000;   	// 1
	min.ftz.f32 	%f495, %f493, %f494;
	mov.f32 	%f496, 0f00000000;   	// 0
	max.ftz.f32 	%f497, %f63, %f496;
	mov.f32 	%f498, 0f3f800000;   	// 1
	min.ftz.f32 	%f499, %f497, %f498;
	add.ftz.f32 	%f500, %f495, %f499;
	mov.f32 	%f501, 0fbf800000;   	// -1
	add.ftz.f32 	%f502, %f500, %f501;
	mov.f32 	%f503, 0f00000000;   	// 0
	max.ftz.f32 	%f504, %f502, %f503;
	mov.f32 	%f505, 0f3f800000;   	// 1
	min.ftz.f32 	%f506, %f504, %f505;
	mul.ftz.f32 	%f507, %f64, %f506;
	fma.rn.ftz.f32 	%f508, %f59, %f199, %f507;
	mul.ftz.f32 	%f509, %f201, %f508;
	fma.rn.ftz.f32 	%f450, %f63, %f204, %f509;
$Lt_123_281602:
	.loc	6	197	0
	mov.f32 	%f57, %f452;
	mov.f32 	%f58, %f451;
	mov.f32 	%f59, %f450;
	mov.f32 	%f60, %f446;
	bra.uni 	$Lt_123_315906;
$Lt_123_2306:
	.loc	6	198	0
	ld.param.f32 	%f510, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f510, %f60;
	mov.f32 	%f511, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f512, %f187, %f511;
	mov.f32 	%f513, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p58, %f512, %f513;
	@!%p58 bra 	$Lt_123_282370;
	.loc	22	608	0
	mov.f32 	%f514, %f61;
	mov.f32 	%f515, %f62;
	mov.f32 	%f516, %f63;
	mov.f32 	%f517, %f64;
	bra.uni 	$Lt_123_283138;
$Lt_123_282370:
	mov.f32 	%f518, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f519, %f64, %f518;
	mov.f32 	%f520, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p59, %f519, %f520;
	@!%p59 bra 	$Lt_123_282882;
	mov.f32 	%f514, %f57;
	mov.f32 	%f515, %f58;
	mov.f32 	%f516, %f59;
	mov.f32 	%f517, %f187;
	bra.uni 	$Lt_123_283138;
$Lt_123_282882:
	mov.u32 	%r120, 720;
	setp.gt.s32 	%p60, %r13, %r120;
	@%p60 bra 	$Lt_123_283394;
	.loc	22	555	0
	ld.const.f32 	%f521, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f522, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f523, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f524, %f58, %f523;
	fma.rn.ftz.f32 	%f525, %f522, %f59, %f524;
	fma.rn.ftz.f32 	%f526, %f521, %f57, %f525;
	cvt.ftz.sat.f32.f32 	%f527, %f526;
	mul.ftz.f32 	%f528, %f62, %f523;
	fma.rn.ftz.f32 	%f529, %f522, %f63, %f528;
	fma.rn.ftz.f32 	%f530, %f521, %f61, %f529;
	cvt.ftz.sat.f32.f32 	%f531, %f530;
	setp.lt.ftz.f32 	%p61, %f527, %f531;
	@!%p61 bra 	$Lt_123_237570;
	.loc	22	468	0
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f532, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f532;
	mov.f32 	%f533, %f190;
	mov.f32 	%f534, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f535, %f190, %f534;
	mov.f32 	%f536, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p62, %f535, %f536;
	@!%p62 bra 	$Lt_123_283906;
	mov.f32 	%f537, 0f00000000;   	// 0
	mov.f32 	%f538, 0f00000000;   	// 0
	mov.f32 	%f539, 0f00000000;   	// 0
	mov.f32 	%f533, 0f00000000;   	// 0
	bra.uni 	$Lt_123_283650;
$Lt_123_283906:
	mov.f32 	%f540, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f540, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f541, 0f3f800000;   	// 1
	mul.ftz.f32 	%f542, %f187, %f200;
	sub.ftz.f32 	%f204, %f541, %f542;
	mul.ftz.f32 	%f543, %f199, %f57;
	fma.rn.ftz.f32 	%f544, %f57, %f64, %f543;
	mul.ftz.f32 	%f545, %f201, %f544;
	fma.rn.ftz.f32 	%f539, %f61, %f204, %f545;
	mul.ftz.f32 	%f546, %f199, %f58;
	fma.rn.ftz.f32 	%f547, %f58, %f64, %f546;
	mul.ftz.f32 	%f548, %f201, %f547;
	fma.rn.ftz.f32 	%f538, %f62, %f204, %f548;
	mul.ftz.f32 	%f549, %f199, %f59;
	fma.rn.ftz.f32 	%f550, %f59, %f64, %f549;
	mul.ftz.f32 	%f551, %f201, %f550;
	fma.rn.ftz.f32 	%f537, %f63, %f204, %f551;
$Lt_123_283650:
	.loc	22	557	0
	mov.f32 	%f552, %f539;
	mov.f32 	%f553, %f538;
	mov.f32 	%f554, %f537;
	mov.f32 	%f555, %f533;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_300_54;
$Lt_123_237570:
	.loc	22	561	0
	mov.f32 	%f552, %f61;
	mov.f32 	%f553, %f62;
	mov.f32 	%f554, %f63;
	mov.f32 	%f555, %f64;
$LDWendi__Z10GetLuma6018PixelRGB_300_54:
	.loc	22	608	0
	mov.f32 	%f514, %f552;
	mov.f32 	%f515, %f553;
	mov.f32 	%f516, %f554;
	mov.f32 	%f517, %f555;
	bra.uni 	$Lt_123_283138;
$Lt_123_283394:
	.loc	22	569	0
	ld.const.f32 	%f556, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f557, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f558, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f559, %f58, %f558;
	fma.rn.ftz.f32 	%f560, %f557, %f59, %f559;
	fma.rn.ftz.f32 	%f561, %f556, %f57, %f560;
	cvt.ftz.sat.f32.f32 	%f562, %f561;
	mul.ftz.f32 	%f563, %f62, %f558;
	fma.rn.ftz.f32 	%f564, %f557, %f63, %f563;
	fma.rn.ftz.f32 	%f565, %f556, %f61, %f564;
	cvt.ftz.sat.f32.f32 	%f566, %f565;
	setp.lt.ftz.f32 	%p63, %f562, %f566;
	@!%p63 bra 	$Lt_123_238082;
	.loc	22	468	0
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f567, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f567;
	mov.f32 	%f568, %f190;
	mov.f32 	%f569, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f570, %f190, %f569;
	mov.f32 	%f571, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p64, %f570, %f571;
	@!%p64 bra 	$Lt_123_284418;
	mov.f32 	%f572, 0f00000000;   	// 0
	mov.f32 	%f573, 0f00000000;   	// 0
	mov.f32 	%f574, 0f00000000;   	// 0
	mov.f32 	%f568, 0f00000000;   	// 0
	bra.uni 	$Lt_123_284162;
$Lt_123_284418:
	mov.f32 	%f575, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f575, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f576, 0f3f800000;   	// 1
	mul.ftz.f32 	%f577, %f187, %f200;
	sub.ftz.f32 	%f204, %f576, %f577;
	mul.ftz.f32 	%f578, %f199, %f57;
	fma.rn.ftz.f32 	%f579, %f57, %f64, %f578;
	mul.ftz.f32 	%f580, %f201, %f579;
	fma.rn.ftz.f32 	%f574, %f61, %f204, %f580;
	mul.ftz.f32 	%f581, %f199, %f58;
	fma.rn.ftz.f32 	%f582, %f58, %f64, %f581;
	mul.ftz.f32 	%f583, %f201, %f582;
	fma.rn.ftz.f32 	%f573, %f62, %f204, %f583;
	mul.ftz.f32 	%f584, %f199, %f59;
	fma.rn.ftz.f32 	%f585, %f59, %f64, %f584;
	mul.ftz.f32 	%f586, %f201, %f585;
	fma.rn.ftz.f32 	%f572, %f63, %f204, %f586;
$Lt_123_284162:
	.loc	22	571	0
	mov.f32 	%f587, %f574;
	mov.f32 	%f588, %f573;
	mov.f32 	%f589, %f572;
	mov.f32 	%f590, %f568;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_300_52;
$Lt_123_238082:
	.loc	22	575	0
	mov.f32 	%f587, %f61;
	mov.f32 	%f588, %f62;
	mov.f32 	%f589, %f63;
	mov.f32 	%f590, %f64;
$LDWendi__Z10GetLuma7098PixelRGB_300_52:
	.loc	22	608	0
	mov.f32 	%f514, %f587;
	mov.f32 	%f515, %f588;
	mov.f32 	%f516, %f589;
	mov.f32 	%f517, %f590;
$Lt_123_283138:
$Lt_123_282626:
$Lt_123_282114:
	.loc	6	198	0
	mov.f32 	%f57, %f514;
	mov.f32 	%f58, %f515;
	mov.f32 	%f59, %f516;
	mov.f32 	%f60, %f517;
	bra.uni 	$Lt_123_315906;
$Lt_123_2562:
	.loc	22	475	0
	ld.param.f32 	%f591, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f591, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f592, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f592;
	mov.f32 	%f593, %f190;
	mov.f32 	%f594, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f595, %f190, %f594;
	mov.f32 	%f596, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p65, %f595, %f596;
	@!%p65 bra 	$Lt_123_284930;
	mov.f32 	%f597, 0f00000000;   	// 0
	mov.f32 	%f598, 0f00000000;   	// 0
	mov.f32 	%f599, 0f00000000;   	// 0
	mov.f32 	%f593, 0f00000000;   	// 0
	bra.uni 	$Lt_123_284674;
$Lt_123_284930:
	mov.f32 	%f600, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f600, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f601, 0f3f800000;   	// 1
	mul.ftz.f32 	%f602, %f187, %f200;
	sub.ftz.f32 	%f204, %f601, %f602;
	mov.f32 	%f603, 0f00000000;   	// 0
	max.ftz.f32 	%f604, %f61, %f603;
	mov.f32 	%f605, 0f3f800000;   	// 1
	min.ftz.f32 	%f606, %f604, %f605;
	mov.f32 	%f607, 0f3f800000;   	// 1
	mov.f32 	%f608, 0f00000000;   	// 0
	max.ftz.f32 	%f609, %f57, %f608;
	mov.f32 	%f610, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f611, %f609, %f610;
	sub.ftz.f32 	%f612, %f607, %f611;
	div.approx.ftz.f32 	%f613, %f606, %f612;
	mov.f32 	%f614, 0f00000000;   	// 0
	max.ftz.f32 	%f615, %f613, %f614;
	mov.f32 	%f616, 0f3f800000;   	// 1
	min.ftz.f32 	%f617, %f615, %f616;
	mul.ftz.f32 	%f618, %f64, %f617;
	fma.rn.ftz.f32 	%f619, %f57, %f199, %f618;
	mul.ftz.f32 	%f620, %f201, %f619;
	fma.rn.ftz.f32 	%f599, %f61, %f204, %f620;
	mov.f32 	%f621, 0f00000000;   	// 0
	max.ftz.f32 	%f622, %f62, %f621;
	mov.f32 	%f623, 0f3f800000;   	// 1
	min.ftz.f32 	%f624, %f622, %f623;
	mov.f32 	%f625, 0f3f800000;   	// 1
	mov.f32 	%f626, 0f00000000;   	// 0
	max.ftz.f32 	%f627, %f58, %f626;
	mov.f32 	%f628, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f629, %f627, %f628;
	sub.ftz.f32 	%f630, %f625, %f629;
	div.approx.ftz.f32 	%f631, %f624, %f630;
	mov.f32 	%f632, 0f00000000;   	// 0
	max.ftz.f32 	%f633, %f631, %f632;
	mov.f32 	%f634, 0f3f800000;   	// 1
	min.ftz.f32 	%f635, %f633, %f634;
	mul.ftz.f32 	%f636, %f64, %f635;
	fma.rn.ftz.f32 	%f637, %f58, %f199, %f636;
	mul.ftz.f32 	%f638, %f201, %f637;
	fma.rn.ftz.f32 	%f598, %f62, %f204, %f638;
	mov.f32 	%f639, 0f00000000;   	// 0
	max.ftz.f32 	%f640, %f63, %f639;
	mov.f32 	%f641, 0f3f800000;   	// 1
	min.ftz.f32 	%f642, %f640, %f641;
	mov.f32 	%f643, 0f3f800000;   	// 1
	mov.f32 	%f644, 0f00000000;   	// 0
	max.ftz.f32 	%f645, %f59, %f644;
	mov.f32 	%f646, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f647, %f645, %f646;
	sub.ftz.f32 	%f648, %f643, %f647;
	div.approx.ftz.f32 	%f649, %f642, %f648;
	mov.f32 	%f650, 0f00000000;   	// 0
	max.ftz.f32 	%f651, %f649, %f650;
	mov.f32 	%f652, 0f3f800000;   	// 1
	min.ftz.f32 	%f653, %f651, %f652;
	mul.ftz.f32 	%f654, %f64, %f653;
	fma.rn.ftz.f32 	%f655, %f59, %f199, %f654;
	mul.ftz.f32 	%f656, %f201, %f655;
	fma.rn.ftz.f32 	%f597, %f63, %f204, %f656;
$Lt_123_284674:
	.loc	6	199	0
	mov.f32 	%f57, %f599;
	mov.f32 	%f58, %f598;
	mov.f32 	%f59, %f597;
	mov.f32 	%f60, %f593;
	bra.uni 	$Lt_123_315906;
$Lt_123_2818:
	.loc	22	476	0
	ld.param.f32 	%f657, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f657, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f658, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f658;
	mov.f32 	%f659, %f190;
	mov.f32 	%f660, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f661, %f190, %f660;
	mov.f32 	%f662, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p66, %f661, %f662;
	@!%p66 bra 	$Lt_123_285442;
	mov.f32 	%f663, 0f00000000;   	// 0
	mov.f32 	%f664, 0f00000000;   	// 0
	mov.f32 	%f665, 0f00000000;   	// 0
	mov.f32 	%f659, 0f00000000;   	// 0
	bra.uni 	$Lt_123_285186;
$Lt_123_285442:
	mov.f32 	%f666, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f666, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f667, 0f3f800000;   	// 1
	mul.ftz.f32 	%f668, %f187, %f200;
	sub.ftz.f32 	%f204, %f667, %f668;
	add.ftz.f32 	%f669, %f57, %f61;
	mov.f32 	%f670, 0f00000000;   	// 0
	max.ftz.f32 	%f671, %f669, %f670;
	mov.f32 	%f672, 0f3f800000;   	// 1
	min.ftz.f32 	%f673, %f671, %f672;
	mul.ftz.f32 	%f674, %f64, %f673;
	fma.rn.ftz.f32 	%f675, %f57, %f199, %f674;
	mul.ftz.f32 	%f676, %f201, %f675;
	fma.rn.ftz.f32 	%f665, %f61, %f204, %f676;
	add.ftz.f32 	%f677, %f58, %f62;
	mov.f32 	%f678, 0f00000000;   	// 0
	max.ftz.f32 	%f679, %f677, %f678;
	mov.f32 	%f680, 0f3f800000;   	// 1
	min.ftz.f32 	%f681, %f679, %f680;
	mul.ftz.f32 	%f682, %f64, %f681;
	fma.rn.ftz.f32 	%f683, %f58, %f199, %f682;
	mul.ftz.f32 	%f684, %f201, %f683;
	fma.rn.ftz.f32 	%f664, %f62, %f204, %f684;
	add.ftz.f32 	%f685, %f59, %f63;
	mov.f32 	%f686, 0f00000000;   	// 0
	max.ftz.f32 	%f687, %f685, %f686;
	mov.f32 	%f688, 0f3f800000;   	// 1
	min.ftz.f32 	%f689, %f687, %f688;
	mul.ftz.f32 	%f690, %f64, %f689;
	fma.rn.ftz.f32 	%f691, %f59, %f199, %f690;
	mul.ftz.f32 	%f692, %f201, %f691;
	fma.rn.ftz.f32 	%f663, %f63, %f204, %f692;
$Lt_123_285186:
	.loc	6	200	0
	mov.f32 	%f57, %f665;
	mov.f32 	%f58, %f664;
	mov.f32 	%f59, %f663;
	mov.f32 	%f60, %f659;
	bra.uni 	$Lt_123_315906;
$Lt_123_3074:
	.loc	6	201	0
	ld.param.f32 	%f693, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f693, %f60;
	mov.f32 	%f694, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f695, %f187, %f694;
	mov.f32 	%f696, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p67, %f695, %f696;
	@!%p67 bra 	$Lt_123_285954;
	.loc	22	609	0
	mov.f32 	%f697, %f61;
	mov.f32 	%f698, %f62;
	mov.f32 	%f699, %f63;
	mov.f32 	%f700, %f64;
	bra.uni 	$Lt_123_286722;
$Lt_123_285954:
	mov.f32 	%f701, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f702, %f64, %f701;
	mov.f32 	%f703, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p68, %f702, %f703;
	@!%p68 bra 	$Lt_123_286466;
	mov.f32 	%f697, %f57;
	mov.f32 	%f698, %f58;
	mov.f32 	%f699, %f59;
	mov.f32 	%f700, %f187;
	bra.uni 	$Lt_123_286722;
$Lt_123_286466:
	mov.u32 	%r121, 720;
	setp.gt.s32 	%p69, %r13, %r121;
	@%p69 bra 	$Lt_123_286978;
	.loc	22	584	0
	ld.const.f32 	%f521, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f522, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f523, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f704, %f58, %f523;
	fma.rn.ftz.f32 	%f705, %f522, %f59, %f704;
	fma.rn.ftz.f32 	%f706, %f521, %f57, %f705;
	cvt.ftz.sat.f32.f32 	%f707, %f706;
	mul.ftz.f32 	%f708, %f62, %f523;
	fma.rn.ftz.f32 	%f709, %f522, %f63, %f708;
	fma.rn.ftz.f32 	%f710, %f521, %f61, %f709;
	cvt.ftz.sat.f32.f32 	%f711, %f710;
	setp.gt.ftz.f32 	%p70, %f707, %f711;
	@!%p70 bra 	$Lt_123_239874;
	.loc	22	468	0
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f712, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f712;
	mov.f32 	%f713, %f190;
	mov.f32 	%f714, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f715, %f190, %f714;
	mov.f32 	%f716, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p71, %f715, %f716;
	@!%p71 bra 	$Lt_123_287490;
	mov.f32 	%f717, 0f00000000;   	// 0
	mov.f32 	%f718, 0f00000000;   	// 0
	mov.f32 	%f719, 0f00000000;   	// 0
	mov.f32 	%f713, 0f00000000;   	// 0
	bra.uni 	$Lt_123_287234;
$Lt_123_287490:
	mov.f32 	%f720, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f720, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f721, 0f3f800000;   	// 1
	mul.ftz.f32 	%f722, %f187, %f200;
	sub.ftz.f32 	%f204, %f721, %f722;
	mul.ftz.f32 	%f723, %f199, %f57;
	fma.rn.ftz.f32 	%f724, %f57, %f64, %f723;
	mul.ftz.f32 	%f725, %f201, %f724;
	fma.rn.ftz.f32 	%f719, %f61, %f204, %f725;
	mul.ftz.f32 	%f726, %f199, %f58;
	fma.rn.ftz.f32 	%f727, %f58, %f64, %f726;
	mul.ftz.f32 	%f728, %f201, %f727;
	fma.rn.ftz.f32 	%f718, %f62, %f204, %f728;
	mul.ftz.f32 	%f729, %f199, %f59;
	fma.rn.ftz.f32 	%f730, %f59, %f64, %f729;
	mul.ftz.f32 	%f731, %f201, %f730;
	fma.rn.ftz.f32 	%f717, %f63, %f204, %f731;
$Lt_123_287234:
	.loc	22	586	0
	mov.f32 	%f732, %f719;
	mov.f32 	%f733, %f718;
	mov.f32 	%f734, %f717;
	mov.f32 	%f735, %f713;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_300_50;
$Lt_123_239874:
	.loc	22	590	0
	mov.f32 	%f732, %f61;
	mov.f32 	%f733, %f62;
	mov.f32 	%f734, %f63;
	mov.f32 	%f735, %f64;
$LDWendi__Z10GetLuma6018PixelRGB_300_50:
	.loc	22	609	0
	mov.f32 	%f697, %f732;
	mov.f32 	%f698, %f733;
	mov.f32 	%f699, %f734;
	mov.f32 	%f700, %f735;
	bra.uni 	$Lt_123_286722;
$Lt_123_286978:
	.loc	22	598	0
	ld.const.f32 	%f736, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f737, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f738, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f739, %f58, %f738;
	fma.rn.ftz.f32 	%f740, %f737, %f59, %f739;
	fma.rn.ftz.f32 	%f741, %f736, %f57, %f740;
	cvt.ftz.sat.f32.f32 	%f742, %f741;
	mul.ftz.f32 	%f743, %f62, %f738;
	fma.rn.ftz.f32 	%f744, %f737, %f63, %f743;
	fma.rn.ftz.f32 	%f745, %f736, %f61, %f744;
	cvt.ftz.sat.f32.f32 	%f746, %f745;
	setp.gt.ftz.f32 	%p72, %f742, %f746;
	@!%p72 bra 	$Lt_123_240386;
	.loc	22	468	0
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f747, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f747;
	mov.f32 	%f748, %f190;
	mov.f32 	%f749, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f750, %f190, %f749;
	mov.f32 	%f751, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p73, %f750, %f751;
	@!%p73 bra 	$Lt_123_288002;
	mov.f32 	%f752, 0f00000000;   	// 0
	mov.f32 	%f753, 0f00000000;   	// 0
	mov.f32 	%f754, 0f00000000;   	// 0
	mov.f32 	%f748, 0f00000000;   	// 0
	bra.uni 	$Lt_123_287746;
$Lt_123_288002:
	mov.f32 	%f755, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f755, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f756, 0f3f800000;   	// 1
	mul.ftz.f32 	%f757, %f187, %f200;
	sub.ftz.f32 	%f204, %f756, %f757;
	mul.ftz.f32 	%f758, %f199, %f57;
	fma.rn.ftz.f32 	%f759, %f57, %f64, %f758;
	mul.ftz.f32 	%f760, %f201, %f759;
	fma.rn.ftz.f32 	%f754, %f61, %f204, %f760;
	mul.ftz.f32 	%f761, %f199, %f58;
	fma.rn.ftz.f32 	%f762, %f58, %f64, %f761;
	mul.ftz.f32 	%f763, %f201, %f762;
	fma.rn.ftz.f32 	%f753, %f62, %f204, %f763;
	mul.ftz.f32 	%f764, %f199, %f59;
	fma.rn.ftz.f32 	%f765, %f59, %f64, %f764;
	mul.ftz.f32 	%f766, %f201, %f765;
	fma.rn.ftz.f32 	%f752, %f63, %f204, %f766;
$Lt_123_287746:
	.loc	22	600	0
	mov.f32 	%f767, %f754;
	mov.f32 	%f768, %f753;
	mov.f32 	%f769, %f752;
	mov.f32 	%f770, %f748;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_300_48;
$Lt_123_240386:
	.loc	22	604	0
	mov.f32 	%f767, %f61;
	mov.f32 	%f768, %f62;
	mov.f32 	%f769, %f63;
	mov.f32 	%f770, %f64;
$LDWendi__Z10GetLuma7098PixelRGB_300_48:
	.loc	22	609	0
	mov.f32 	%f697, %f767;
	mov.f32 	%f698, %f768;
	mov.f32 	%f699, %f769;
	mov.f32 	%f700, %f770;
$Lt_123_286722:
$Lt_123_286210:
$Lt_123_285698:
	.loc	6	201	0
	mov.f32 	%f57, %f697;
	mov.f32 	%f58, %f698;
	mov.f32 	%f59, %f699;
	mov.f32 	%f60, %f700;
	bra.uni 	$Lt_123_315906;
$Lt_123_3330:
	.loc	22	477	0
	ld.param.f32 	%f771, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f771, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f772, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f772;
	mov.f32 	%f773, %f190;
	mov.f32 	%f774, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f775, %f190, %f774;
	mov.f32 	%f776, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p74, %f775, %f776;
	@!%p74 bra 	$Lt_123_288514;
	mov.f32 	%f777, 0f00000000;   	// 0
	mov.f32 	%f778, 0f00000000;   	// 0
	mov.f32 	%f779, 0f00000000;   	// 0
	mov.f32 	%f773, 0f00000000;   	// 0
	bra.uni 	$Lt_123_288258;
$Lt_123_288514:
	.loc	22	373	0
	mov.f32 	%f780, 0f00000000;   	// 0
	max.ftz.f32 	%f781, %f61, %f780;
	mov.f32 	%f782, 0f00000000;   	// 0
	max.ftz.f32 	%f783, %f57, %f782;
	mov.f32 	%f784, 0f3f800000;   	// 1
	min.ftz.f32 	%f785, %f781, %f784;
	mov.f32 	%f786, 0f3f800000;   	// 1
	min.ftz.f32 	%f787, %f783, %f786;
	mov.f32 	%f788, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p75, %f785, %f788;
	@!%p75 bra 	$Lt_123_289026;
	add.ftz.f32 	%f789, %f787, %f787;
	mul.ftz.f32 	%f790, %f785, %f789;
	bra.uni 	$Lt_123_288770;
$Lt_123_289026:
	mov.f32 	%f791, 0f3f800000;   	// 1
	sub.ftz.f32 	%f792, %f791, %f787;
	mov.f32 	%f793, 0f3f800000;   	// 1
	add.ftz.f32 	%f794, %f792, %f792;
	mov.f32 	%f795, 0f3f800000;   	// 1
	sub.ftz.f32 	%f796, %f795, %f785;
	mul.ftz.f32 	%f797, %f794, %f796;
	sub.ftz.f32 	%f790, %f793, %f797;
$Lt_123_288770:
	.loc	22	477	0
	mov.f32 	%f798, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f798, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f799, 0f3f800000;   	// 1
	mul.ftz.f32 	%f800, %f187, %f200;
	sub.ftz.f32 	%f204, %f799, %f800;
	mov.f32 	%f801, 0f00000000;   	// 0
	max.ftz.f32 	%f802, %f790, %f801;
	mov.f32 	%f803, 0f3f800000;   	// 1
	min.ftz.f32 	%f804, %f802, %f803;
	mul.ftz.f32 	%f805, %f64, %f804;
	fma.rn.ftz.f32 	%f806, %f57, %f199, %f805;
	mul.ftz.f32 	%f807, %f201, %f806;
	fma.rn.ftz.f32 	%f779, %f61, %f204, %f807;
	.loc	22	373	0
	mov.f32 	%f808, 0f00000000;   	// 0
	max.ftz.f32 	%f809, %f62, %f808;
	mov.f32 	%f810, 0f00000000;   	// 0
	max.ftz.f32 	%f811, %f58, %f810;
	mov.f32 	%f812, 0f3f800000;   	// 1
	min.ftz.f32 	%f813, %f809, %f812;
	mov.f32 	%f814, 0f3f800000;   	// 1
	min.ftz.f32 	%f815, %f811, %f814;
	mov.f32 	%f816, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p76, %f813, %f816;
	@!%p76 bra 	$Lt_123_289538;
	add.ftz.f32 	%f817, %f815, %f815;
	mul.ftz.f32 	%f818, %f813, %f817;
	bra.uni 	$Lt_123_289282;
$Lt_123_289538:
	mov.f32 	%f819, 0f3f800000;   	// 1
	sub.ftz.f32 	%f820, %f819, %f815;
	mov.f32 	%f821, 0f3f800000;   	// 1
	add.ftz.f32 	%f822, %f820, %f820;
	mov.f32 	%f823, 0f3f800000;   	// 1
	sub.ftz.f32 	%f824, %f823, %f813;
	mul.ftz.f32 	%f825, %f822, %f824;
	sub.ftz.f32 	%f818, %f821, %f825;
$Lt_123_289282:
	.loc	22	477	0
	mov.f32 	%f826, 0f00000000;   	// 0
	max.ftz.f32 	%f827, %f818, %f826;
	mov.f32 	%f828, 0f3f800000;   	// 1
	min.ftz.f32 	%f829, %f827, %f828;
	mul.ftz.f32 	%f830, %f64, %f829;
	fma.rn.ftz.f32 	%f831, %f58, %f199, %f830;
	mul.ftz.f32 	%f832, %f201, %f831;
	fma.rn.ftz.f32 	%f778, %f62, %f204, %f832;
	.loc	22	373	0
	mov.f32 	%f833, 0f00000000;   	// 0
	max.ftz.f32 	%f834, %f63, %f833;
	mov.f32 	%f835, 0f00000000;   	// 0
	max.ftz.f32 	%f836, %f59, %f835;
	mov.f32 	%f837, 0f3f800000;   	// 1
	min.ftz.f32 	%f838, %f834, %f837;
	mov.f32 	%f839, 0f3f800000;   	// 1
	min.ftz.f32 	%f840, %f836, %f839;
	mov.f32 	%f841, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p77, %f838, %f841;
	@!%p77 bra 	$Lt_123_290050;
	add.ftz.f32 	%f842, %f840, %f840;
	mul.ftz.f32 	%f843, %f838, %f842;
	bra.uni 	$Lt_123_289794;
$Lt_123_290050:
	mov.f32 	%f844, 0f3f800000;   	// 1
	sub.ftz.f32 	%f845, %f844, %f840;
	mov.f32 	%f846, 0f3f800000;   	// 1
	add.ftz.f32 	%f847, %f845, %f845;
	mov.f32 	%f848, 0f3f800000;   	// 1
	sub.ftz.f32 	%f849, %f848, %f838;
	mul.ftz.f32 	%f850, %f847, %f849;
	sub.ftz.f32 	%f843, %f846, %f850;
$Lt_123_289794:
	.loc	22	477	0
	mov.f32 	%f851, 0f00000000;   	// 0
	max.ftz.f32 	%f852, %f843, %f851;
	mov.f32 	%f853, 0f3f800000;   	// 1
	min.ftz.f32 	%f854, %f852, %f853;
	mul.ftz.f32 	%f855, %f64, %f854;
	fma.rn.ftz.f32 	%f856, %f59, %f199, %f855;
	mul.ftz.f32 	%f857, %f201, %f856;
	fma.rn.ftz.f32 	%f777, %f63, %f204, %f857;
$Lt_123_288258:
	.loc	6	202	0
	mov.f32 	%f57, %f779;
	mov.f32 	%f58, %f778;
	mov.f32 	%f59, %f777;
	mov.f32 	%f60, %f773;
	bra.uni 	$Lt_123_315906;
$Lt_123_3586:
	.loc	22	478	0
	ld.param.f32 	%f858, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f858, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f859, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f859;
	mov.f32 	%f860, %f190;
	mov.f32 	%f861, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f862, %f190, %f861;
	mov.f32 	%f863, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p78, %f862, %f863;
	@!%p78 bra 	$Lt_123_290562;
	mov.f32 	%f864, 0f00000000;   	// 0
	mov.f32 	%f865, 0f00000000;   	// 0
	mov.f32 	%f866, 0f00000000;   	// 0
	mov.f32 	%f860, 0f00000000;   	// 0
	bra.uni 	$Lt_123_290306;
$Lt_123_290562:
	.loc	22	380	0
	mov.f32 	%f867, 0f00000000;   	// 0
	max.ftz.f32 	%f781, %f61, %f867;
	mov.f32 	%f868, 0f00000000;   	// 0
	max.ftz.f32 	%f783, %f57, %f868;
	mov.f32 	%f869, 0f3f800000;   	// 1
	min.ftz.f32 	%f785, %f781, %f869;
	mov.f32 	%f870, 0f3f800000;   	// 1
	min.ftz.f32 	%f787, %f783, %f870;
	add.ftz.f32 	%f871, %f787, %f787;
	mov.f32 	%f872, 0fbf800000;   	// -1
	add.ftz.f32 	%f873, %f871, %f872;
	mov.f32 	%f874, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p79, %f787, %f874;
	@!%p79 bra 	$Lt_123_291074;
	mul.ftz.f32 	%f875, %f785, %f785;
	sub.ftz.f32 	%f876, %f785, %f875;
	fma.rn.ftz.f32 	%f877, %f873, %f876, %f785;
	bra.uni 	$Lt_123_290818;
$Lt_123_291074:
	sqrt.approx.ftz.f32 	%f878, %f785;
	sub.ftz.f32 	%f879, %f878, %f785;
	fma.rn.ftz.f32 	%f877, %f873, %f879, %f785;
$Lt_123_290818:
	.loc	22	478	0
	mov.f32 	%f880, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f880, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f881, 0f3f800000;   	// 1
	mul.ftz.f32 	%f882, %f187, %f200;
	sub.ftz.f32 	%f204, %f881, %f882;
	mov.f32 	%f883, 0f00000000;   	// 0
	max.ftz.f32 	%f884, %f877, %f883;
	mov.f32 	%f885, 0f3f800000;   	// 1
	min.ftz.f32 	%f886, %f884, %f885;
	mul.ftz.f32 	%f887, %f64, %f886;
	fma.rn.ftz.f32 	%f888, %f57, %f199, %f887;
	mul.ftz.f32 	%f889, %f201, %f888;
	fma.rn.ftz.f32 	%f866, %f61, %f204, %f889;
	.loc	22	380	0
	mov.f32 	%f890, 0f00000000;   	// 0
	max.ftz.f32 	%f809, %f62, %f890;
	mov.f32 	%f891, 0f00000000;   	// 0
	max.ftz.f32 	%f811, %f58, %f891;
	mov.f32 	%f892, 0f3f800000;   	// 1
	min.ftz.f32 	%f813, %f809, %f892;
	mov.f32 	%f893, 0f3f800000;   	// 1
	min.ftz.f32 	%f815, %f811, %f893;
	add.ftz.f32 	%f894, %f815, %f815;
	mov.f32 	%f895, 0fbf800000;   	// -1
	add.ftz.f32 	%f896, %f894, %f895;
	mov.f32 	%f897, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p80, %f815, %f897;
	@!%p80 bra 	$Lt_123_291586;
	mul.ftz.f32 	%f898, %f813, %f813;
	sub.ftz.f32 	%f899, %f813, %f898;
	fma.rn.ftz.f32 	%f900, %f896, %f899, %f813;
	bra.uni 	$Lt_123_291330;
$Lt_123_291586:
	sqrt.approx.ftz.f32 	%f901, %f813;
	sub.ftz.f32 	%f902, %f901, %f813;
	fma.rn.ftz.f32 	%f900, %f896, %f902, %f813;
$Lt_123_291330:
	.loc	22	478	0
	mov.f32 	%f903, 0f00000000;   	// 0
	max.ftz.f32 	%f904, %f900, %f903;
	mov.f32 	%f905, 0f3f800000;   	// 1
	min.ftz.f32 	%f906, %f904, %f905;
	mul.ftz.f32 	%f907, %f64, %f906;
	fma.rn.ftz.f32 	%f908, %f58, %f199, %f907;
	mul.ftz.f32 	%f909, %f201, %f908;
	fma.rn.ftz.f32 	%f865, %f62, %f204, %f909;
	.loc	22	380	0
	mov.f32 	%f910, 0f00000000;   	// 0
	max.ftz.f32 	%f834, %f63, %f910;
	mov.f32 	%f911, 0f00000000;   	// 0
	max.ftz.f32 	%f836, %f59, %f911;
	mov.f32 	%f912, 0f3f800000;   	// 1
	min.ftz.f32 	%f838, %f834, %f912;
	mov.f32 	%f913, 0f3f800000;   	// 1
	min.ftz.f32 	%f840, %f836, %f913;
	add.ftz.f32 	%f914, %f840, %f840;
	mov.f32 	%f915, 0fbf800000;   	// -1
	add.ftz.f32 	%f916, %f914, %f915;
	mov.f32 	%f917, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p81, %f840, %f917;
	@!%p81 bra 	$Lt_123_292098;
	mul.ftz.f32 	%f918, %f838, %f838;
	sub.ftz.f32 	%f919, %f838, %f918;
	fma.rn.ftz.f32 	%f920, %f916, %f919, %f838;
	bra.uni 	$Lt_123_291842;
$Lt_123_292098:
	sqrt.approx.ftz.f32 	%f921, %f838;
	sub.ftz.f32 	%f922, %f921, %f838;
	fma.rn.ftz.f32 	%f920, %f916, %f922, %f838;
$Lt_123_291842:
	.loc	22	478	0
	mov.f32 	%f923, 0f00000000;   	// 0
	max.ftz.f32 	%f924, %f920, %f923;
	mov.f32 	%f925, 0f3f800000;   	// 1
	min.ftz.f32 	%f926, %f924, %f925;
	mul.ftz.f32 	%f927, %f64, %f926;
	fma.rn.ftz.f32 	%f928, %f59, %f199, %f927;
	mul.ftz.f32 	%f929, %f201, %f928;
	fma.rn.ftz.f32 	%f864, %f63, %f204, %f929;
$Lt_123_290306:
	.loc	6	203	0
	mov.f32 	%f57, %f866;
	mov.f32 	%f58, %f865;
	mov.f32 	%f59, %f864;
	mov.f32 	%f60, %f860;
	bra.uni 	$Lt_123_315906;
$Lt_123_3842:
	.loc	22	479	0
	ld.param.f32 	%f930, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f930, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f931, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f931;
	mov.f32 	%f932, %f190;
	mov.f32 	%f933, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f934, %f190, %f933;
	mov.f32 	%f935, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p82, %f934, %f935;
	@!%p82 bra 	$Lt_123_292610;
	mov.f32 	%f936, 0f00000000;   	// 0
	mov.f32 	%f937, 0f00000000;   	// 0
	mov.f32 	%f938, 0f00000000;   	// 0
	mov.f32 	%f932, 0f00000000;   	// 0
	bra.uni 	$Lt_123_292354;
$Lt_123_292610:
	.loc	22	386	0
	mov.f32 	%f939, 0f00000000;   	// 0
	max.ftz.f32 	%f781, %f61, %f939;
	mov.f32 	%f940, 0f00000000;   	// 0
	max.ftz.f32 	%f783, %f57, %f940;
	mov.f32 	%f941, 0f3f800000;   	// 1
	min.ftz.f32 	%f785, %f781, %f941;
	mov.f32 	%f942, 0f3f800000;   	// 1
	min.ftz.f32 	%f787, %f783, %f942;
	mov.f32 	%f943, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p83, %f787, %f943;
	@!%p83 bra 	$Lt_123_293122;
	add.ftz.f32 	%f944, %f787, %f787;
	mul.ftz.f32 	%f945, %f785, %f944;
	bra.uni 	$Lt_123_292866;
$Lt_123_293122:
	mov.f32 	%f946, 0f3f800000;   	// 1
	sub.ftz.f32 	%f947, %f946, %f787;
	mov.f32 	%f948, 0f3f800000;   	// 1
	add.ftz.f32 	%f949, %f947, %f947;
	mov.f32 	%f950, 0f3f800000;   	// 1
	sub.ftz.f32 	%f951, %f950, %f785;
	mul.ftz.f32 	%f952, %f949, %f951;
	sub.ftz.f32 	%f945, %f948, %f952;
$Lt_123_292866:
	.loc	22	479	0
	mov.f32 	%f953, 0f3f800000;   	// 1
	sub.ftz.f32 	%f199, %f953, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f954, 0f3f800000;   	// 1
	mul.ftz.f32 	%f955, %f187, %f200;
	sub.ftz.f32 	%f204, %f954, %f955;
	mov.f32 	%f956, 0f00000000;   	// 0
	max.ftz.f32 	%f957, %f945, %f956;
	mov.f32 	%f958, 0f3f800000;   	// 1
	min.ftz.f32 	%f959, %f957, %f958;
	mul.ftz.f32 	%f960, %f64, %f959;
	fma.rn.ftz.f32 	%f961, %f57, %f199, %f960;
	mul.ftz.f32 	%f962, %f201, %f961;
	fma.rn.ftz.f32 	%f938, %f61, %f204, %f962;
	.loc	22	386	0
	mov.f32 	%f963, 0f00000000;   	// 0
	max.ftz.f32 	%f809, %f62, %f963;
	mov.f32 	%f964, 0f00000000;   	// 0
	max.ftz.f32 	%f811, %f58, %f964;
	mov.f32 	%f965, 0f3f800000;   	// 1
	min.ftz.f32 	%f813, %f809, %f965;
	mov.f32 	%f966, 0f3f800000;   	// 1
	min.ftz.f32 	%f815, %f811, %f966;
	mov.f32 	%f967, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p84, %f815, %f967;
	@!%p84 bra 	$Lt_123_293634;
	add.ftz.f32 	%f968, %f815, %f815;
	mul.ftz.f32 	%f969, %f813, %f968;
	bra.uni 	$Lt_123_293378;
$Lt_123_293634:
	mov.f32 	%f970, 0f3f800000;   	// 1
	sub.ftz.f32 	%f971, %f970, %f815;
	mov.f32 	%f972, 0f3f800000;   	// 1
	add.ftz.f32 	%f973, %f971, %f971;
	mov.f32 	%f974, 0f3f800000;   	// 1
	sub.ftz.f32 	%f975, %f974, %f813;
	mul.ftz.f32 	%f976, %f973, %f975;
	sub.ftz.f32 	%f969, %f972, %f976;
$Lt_123_293378:
	.loc	22	479	0
	mov.f32 	%f977, 0f00000000;   	// 0
	max.ftz.f32 	%f978, %f969, %f977;
	mov.f32 	%f979, 0f3f800000;   	// 1
	min.ftz.f32 	%f980, %f978, %f979;
	mul.ftz.f32 	%f981, %f64, %f980;
	fma.rn.ftz.f32 	%f982, %f58, %f199, %f981;
	mul.ftz.f32 	%f983, %f201, %f982;
	fma.rn.ftz.f32 	%f937, %f62, %f204, %f983;
	.loc	22	386	0
	mov.f32 	%f984, 0f00000000;   	// 0
	max.ftz.f32 	%f834, %f63, %f984;
	mov.f32 	%f985, 0f00000000;   	// 0
	max.ftz.f32 	%f836, %f59, %f985;
	mov.f32 	%f986, 0f3f800000;   	// 1
	min.ftz.f32 	%f838, %f834, %f986;
	mov.f32 	%f987, 0f3f800000;   	// 1
	min.ftz.f32 	%f840, %f836, %f987;
	mov.f32 	%f988, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p85, %f840, %f988;
	@!%p85 bra 	$Lt_123_294146;
	add.ftz.f32 	%f989, %f840, %f840;
	mul.ftz.f32 	%f990, %f838, %f989;
	bra.uni 	$Lt_123_293890;
$Lt_123_294146:
	mov.f32 	%f991, 0f3f800000;   	// 1
	sub.ftz.f32 	%f992, %f991, %f840;
	mov.f32 	%f993, 0f3f800000;   	// 1
	add.ftz.f32 	%f994, %f992, %f992;
	mov.f32 	%f995, 0f3f800000;   	// 1
	sub.ftz.f32 	%f996, %f995, %f838;
	mul.ftz.f32 	%f997, %f994, %f996;
	sub.ftz.f32 	%f990, %f993, %f997;
$Lt_123_293890:
	.loc	22	479	0
	mov.f32 	%f998, 0f00000000;   	// 0
	max.ftz.f32 	%f999, %f990, %f998;
	mov.f32 	%f1000, 0f3f800000;  	// 1
	min.ftz.f32 	%f1001, %f999, %f1000;
	mul.ftz.f32 	%f1002, %f64, %f1001;
	fma.rn.ftz.f32 	%f1003, %f59, %f199, %f1002;
	mul.ftz.f32 	%f1004, %f201, %f1003;
	fma.rn.ftz.f32 	%f936, %f63, %f204, %f1004;
$Lt_123_292354:
	.loc	6	204	0
	mov.f32 	%f57, %f938;
	mov.f32 	%f58, %f937;
	mov.f32 	%f59, %f936;
	mov.f32 	%f60, %f932;
	bra.uni 	$Lt_123_315906;
$Lt_123_4098:
	.loc	22	480	0
	ld.param.f32 	%f1005, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1005, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1006, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1006;
	mov.f32 	%f1007, %f190;
	mov.f32 	%f1008, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1009, %f190, %f1008;
	mov.f32 	%f1010, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p86, %f1009, %f1010;
	@!%p86 bra 	$Lt_123_294658;
	mov.f32 	%f1011, 0f00000000;  	// 0
	mov.f32 	%f1012, 0f00000000;  	// 0
	mov.f32 	%f1013, 0f00000000;  	// 0
	mov.f32 	%f1007, 0f00000000;  	// 0
	bra.uni 	$Lt_123_294402;
$Lt_123_294658:
	.loc	22	431	0
	mov.f32 	%f1014, 0f00000000;  	// 0
	max.ftz.f32 	%f781, %f61, %f1014;
	mov.f32 	%f1015, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1016, %f57, %f1015;
	mov.f32 	%f1017, 0f3f800000;  	// 1
	min.ftz.f32 	%f785, %f781, %f1017;
	mov.f32 	%f1018, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1019, %f1016, %f1018;
	mov.f32 	%f1020, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p87, %f1019, %f1020;
	@!%p87 bra 	$Lt_123_244226;
	.loc	22	433	0
	mov.f32 	%f1021, 0f3f800000;  	// 1
	mov.f32 	%f1022, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1023, %f1022, %f785;
	add.ftz.f32 	%f1024, %f1019, %f1019;
	div.approx.ftz.f32 	%f1025, %f1023, %f1024;
	sub.ftz.f32 	%f1026, %f1021, %f1025;
	mov.f32 	%f1027, 0f00000000;  	// 0
	max.ftz.f32 	%f1028, %f1026, %f1027;
	mov.f32 	%f1029, 0f3f800000;  	// 1
	min.ftz.f32 	%f1030, %f1028, %f1029;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__300_46;
$Lt_123_244226:
	.loc	22	437	0
	mov.f32 	%f1031, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1032, %f1031, %f1019;
	add.ftz.f32 	%f1033, %f1032, %f1032;
	div.approx.ftz.f32 	%f1034, %f785, %f1033;
	mov.f32 	%f1035, 0f00000000;  	// 0
	max.ftz.f32 	%f1036, %f1034, %f1035;
	mov.f32 	%f1037, 0f3f800000;  	// 1
	min.ftz.f32 	%f1030, %f1036, %f1037;
$LDWendi__Z5ClampIfET_S0_S0_S0__300_46:
	.loc	22	480	0
	mov.f32 	%f1038, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1038, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1039, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1040, %f187, %f200;
	sub.ftz.f32 	%f204, %f1039, %f1040;
	mul.ftz.f32 	%f1041, %f1030, %f64;
	fma.rn.ftz.f32 	%f1042, %f57, %f199, %f1041;
	mul.ftz.f32 	%f1043, %f201, %f1042;
	fma.rn.ftz.f32 	%f1013, %f61, %f204, %f1043;
	.loc	22	431	0
	mov.f32 	%f1044, 0f00000000;  	// 0
	max.ftz.f32 	%f809, %f62, %f1044;
	mov.f32 	%f1045, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1046, %f58, %f1045;
	mov.f32 	%f1047, 0f3f800000;  	// 1
	min.ftz.f32 	%f813, %f809, %f1047;
	mov.f32 	%f1048, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1049, %f1046, %f1048;
	mov.f32 	%f1050, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p88, %f1049, %f1050;
	@!%p88 bra 	$Lt_123_244482;
	.loc	22	433	0
	mov.f32 	%f1051, 0f3f800000;  	// 1
	mov.f32 	%f1052, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1053, %f1052, %f813;
	add.ftz.f32 	%f1054, %f1049, %f1049;
	div.approx.ftz.f32 	%f1055, %f1053, %f1054;
	sub.ftz.f32 	%f1056, %f1051, %f1055;
	mov.f32 	%f1057, 0f00000000;  	// 0
	max.ftz.f32 	%f1058, %f1056, %f1057;
	mov.f32 	%f1059, 0f3f800000;  	// 1
	min.ftz.f32 	%f1060, %f1058, %f1059;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__300_44;
$Lt_123_244482:
	.loc	22	437	0
	mov.f32 	%f1061, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1062, %f1061, %f1049;
	add.ftz.f32 	%f1063, %f1062, %f1062;
	div.approx.ftz.f32 	%f1064, %f813, %f1063;
	mov.f32 	%f1065, 0f00000000;  	// 0
	max.ftz.f32 	%f1066, %f1064, %f1065;
	mov.f32 	%f1067, 0f3f800000;  	// 1
	min.ftz.f32 	%f1060, %f1066, %f1067;
$LDWendi__Z5ClampIfET_S0_S0_S0__300_44:
	.loc	22	480	0
	mul.ftz.f32 	%f1068, %f1060, %f64;
	fma.rn.ftz.f32 	%f1069, %f58, %f199, %f1068;
	mul.ftz.f32 	%f1070, %f201, %f1069;
	fma.rn.ftz.f32 	%f1012, %f62, %f204, %f1070;
	.loc	22	431	0
	mov.f32 	%f1071, 0f00000000;  	// 0
	max.ftz.f32 	%f834, %f63, %f1071;
	mov.f32 	%f1072, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1073, %f59, %f1072;
	mov.f32 	%f1074, 0f3f800000;  	// 1
	min.ftz.f32 	%f838, %f834, %f1074;
	mov.f32 	%f1075, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1076, %f1073, %f1075;
	mov.f32 	%f1077, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p89, %f1076, %f1077;
	@!%p89 bra 	$Lt_123_244738;
	.loc	22	433	0
	mov.f32 	%f1078, 0f3f800000;  	// 1
	mov.f32 	%f1079, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1080, %f1079, %f838;
	add.ftz.f32 	%f1081, %f1076, %f1076;
	div.approx.ftz.f32 	%f1082, %f1080, %f1081;
	sub.ftz.f32 	%f1083, %f1078, %f1082;
	mov.f32 	%f1084, 0f00000000;  	// 0
	max.ftz.f32 	%f1085, %f1083, %f1084;
	mov.f32 	%f1086, 0f3f800000;  	// 1
	min.ftz.f32 	%f1087, %f1085, %f1086;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__300_42;
$Lt_123_244738:
	.loc	22	437	0
	mov.f32 	%f1088, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1089, %f1088, %f1076;
	add.ftz.f32 	%f1090, %f1089, %f1089;
	div.approx.ftz.f32 	%f1091, %f838, %f1090;
	mov.f32 	%f1092, 0f00000000;  	// 0
	max.ftz.f32 	%f1093, %f1091, %f1092;
	mov.f32 	%f1094, 0f3f800000;  	// 1
	min.ftz.f32 	%f1087, %f1093, %f1094;
$LDWendi__Z5ClampIfET_S0_S0_S0__300_42:
	.loc	22	480	0
	mul.ftz.f32 	%f1095, %f1087, %f64;
	fma.rn.ftz.f32 	%f1096, %f59, %f199, %f1095;
	mul.ftz.f32 	%f1097, %f201, %f1096;
	fma.rn.ftz.f32 	%f1011, %f63, %f204, %f1097;
$Lt_123_294402:
	.loc	6	205	0
	mov.f32 	%f57, %f1013;
	mov.f32 	%f58, %f1012;
	mov.f32 	%f59, %f1011;
	mov.f32 	%f60, %f1007;
	bra.uni 	$Lt_123_315906;
$Lt_123_4354:
	.loc	22	481	0
	ld.param.f32 	%f1098, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1098, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1099, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1099;
	mov.f32 	%f1100, %f190;
	mov.f32 	%f1101, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1102, %f190, %f1101;
	mov.f32 	%f1103, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p90, %f1102, %f1103;
	@!%p90 bra 	$Lt_123_295170;
	mov.f32 	%f1104, 0f00000000;  	// 0
	mov.f32 	%f1105, 0f00000000;  	// 0
	mov.f32 	%f1106, 0f00000000;  	// 0
	mov.f32 	%f1100, 0f00000000;  	// 0
	bra.uni 	$Lt_123_294914;
$Lt_123_295170:
	mov.f32 	%f1107, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1107, %f64;
	mov.f32 	%f1108, 0f00000000;  	// 0
	max.ftz.f32 	%f783, %f57, %f1108;
	mov.f32 	%f1109, 0f3f800000;  	// 1
	min.ftz.f32 	%f787, %f783, %f1109;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1110, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1111, %f187, %f200;
	sub.ftz.f32 	%f204, %f1110, %f1111;
	add.ftz.f32 	%f1112, %f787, %f787;
	mov.f32 	%f1113, 0f00000000;  	// 0
	max.ftz.f32 	%f1114, %f61, %f1113;
	mov.f32 	%f1115, 0f3f800000;  	// 1
	min.ftz.f32 	%f1116, %f1114, %f1115;
	add.ftz.f32 	%f1117, %f1112, %f1116;
	mov.f32 	%f1118, 0fbf800000;  	// -1
	add.ftz.f32 	%f1119, %f1117, %f1118;
	mul.ftz.f32 	%f1120, %f64, %f1119;
	fma.rn.ftz.f32 	%f1121, %f57, %f199, %f1120;
	mul.ftz.f32 	%f1122, %f201, %f1121;
	fma.rn.ftz.f32 	%f1106, %f61, %f204, %f1122;
	mov.f32 	%f1123, 0f00000000;  	// 0
	max.ftz.f32 	%f811, %f58, %f1123;
	mov.f32 	%f1124, 0f3f800000;  	// 1
	min.ftz.f32 	%f815, %f811, %f1124;
	add.ftz.f32 	%f1125, %f815, %f815;
	mov.f32 	%f1126, 0f00000000;  	// 0
	max.ftz.f32 	%f1127, %f62, %f1126;
	mov.f32 	%f1128, 0f3f800000;  	// 1
	min.ftz.f32 	%f1129, %f1127, %f1128;
	add.ftz.f32 	%f1130, %f1125, %f1129;
	mov.f32 	%f1131, 0fbf800000;  	// -1
	add.ftz.f32 	%f1132, %f1130, %f1131;
	mul.ftz.f32 	%f1133, %f64, %f1132;
	fma.rn.ftz.f32 	%f1134, %f58, %f199, %f1133;
	mul.ftz.f32 	%f1135, %f201, %f1134;
	fma.rn.ftz.f32 	%f1105, %f62, %f204, %f1135;
	mov.f32 	%f1136, 0f00000000;  	// 0
	max.ftz.f32 	%f836, %f59, %f1136;
	mov.f32 	%f1137, 0f3f800000;  	// 1
	min.ftz.f32 	%f840, %f836, %f1137;
	add.ftz.f32 	%f1138, %f840, %f840;
	mov.f32 	%f1139, 0f00000000;  	// 0
	max.ftz.f32 	%f1140, %f63, %f1139;
	mov.f32 	%f1141, 0f3f800000;  	// 1
	min.ftz.f32 	%f1142, %f1140, %f1141;
	add.ftz.f32 	%f1143, %f1138, %f1142;
	mov.f32 	%f1144, 0fbf800000;  	// -1
	add.ftz.f32 	%f1145, %f1143, %f1144;
	mul.ftz.f32 	%f1146, %f64, %f1145;
	fma.rn.ftz.f32 	%f1147, %f59, %f199, %f1146;
	mul.ftz.f32 	%f1148, %f201, %f1147;
	fma.rn.ftz.f32 	%f1104, %f63, %f204, %f1148;
$Lt_123_294914:
	.loc	6	206	0
	mov.f32 	%f57, %f1106;
	mov.f32 	%f58, %f1105;
	mov.f32 	%f59, %f1104;
	mov.f32 	%f60, %f1100;
	bra.uni 	$Lt_123_315906;
$Lt_123_4610:
	.loc	22	482	0
	ld.param.f32 	%f1149, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1149, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1150, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1150;
	mov.f32 	%f1151, %f190;
	mov.f32 	%f1152, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1153, %f190, %f1152;
	mov.f32 	%f1154, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p91, %f1153, %f1154;
	@!%p91 bra 	$Lt_123_295682;
	mov.f32 	%f1155, 0f00000000;  	// 0
	mov.f32 	%f1156, 0f00000000;  	// 0
	mov.f32 	%f1157, 0f00000000;  	// 0
	mov.f32 	%f1151, 0f00000000;  	// 0
	bra.uni 	$Lt_123_295426;
$Lt_123_295682:
	.loc	22	450	0
	mov.f32 	%f1158, 0f00000000;  	// 0
	max.ftz.f32 	%f781, %f61, %f1158;
	mov.f32 	%f1159, 0f00000000;  	// 0
	max.ftz.f32 	%f783, %f57, %f1159;
	mov.f32 	%f1160, 0f3f800000;  	// 1
	min.ftz.f32 	%f785, %f781, %f1160;
	mov.f32 	%f1161, 0f3f800000;  	// 1
	min.ftz.f32 	%f787, %f783, %f1161;
	add.ftz.f32 	%f1162, %f787, %f787;
	mov.f32 	%f1163, 0fbf800000;  	// -1
	add.ftz.f32 	%f1164, %f1162, %f1163;
	setp.gt.ftz.f32 	%p92, %f1164, %f785;
	@!%p92 bra 	$Lt_123_245506;
	.loc	22	452	0
	mov.f32 	%f1165, %f1164;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__300_40;
$Lt_123_245506:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p93, %f1162, %f785;
	@!%p93 bra 	$Lt_123_245762;
	.loc	22	456	0
	mov.f32 	%f1165, %f1162;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__300_40;
$Lt_123_245762:
	.loc	22	460	0
	mov.f32 	%f1165, %f785;
$LDWendi__Z5ClampIfET_S0_S0_S0__300_40:
	.loc	22	482	0
	mov.f32 	%f1166, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1166, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1167, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1168, %f187, %f200;
	sub.ftz.f32 	%f204, %f1167, %f1168;
	mul.ftz.f32 	%f1169, %f1165, %f64;
	fma.rn.ftz.f32 	%f1170, %f57, %f199, %f1169;
	mul.ftz.f32 	%f1171, %f201, %f1170;
	fma.rn.ftz.f32 	%f1157, %f61, %f204, %f1171;
	.loc	22	450	0
	mov.f32 	%f1172, 0f00000000;  	// 0
	max.ftz.f32 	%f809, %f62, %f1172;
	mov.f32 	%f1173, 0f00000000;  	// 0
	max.ftz.f32 	%f811, %f58, %f1173;
	mov.f32 	%f1174, 0f3f800000;  	// 1
	min.ftz.f32 	%f813, %f809, %f1174;
	mov.f32 	%f1175, 0f3f800000;  	// 1
	min.ftz.f32 	%f815, %f811, %f1175;
	add.ftz.f32 	%f1176, %f815, %f815;
	mov.f32 	%f1177, 0fbf800000;  	// -1
	add.ftz.f32 	%f1178, %f1176, %f1177;
	setp.gt.ftz.f32 	%p94, %f1178, %f813;
	@!%p94 bra 	$Lt_123_246018;
	.loc	22	452	0
	mov.f32 	%f1179, %f1178;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__300_38;
$Lt_123_246018:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p95, %f1176, %f813;
	@!%p95 bra 	$Lt_123_246274;
	.loc	22	456	0
	mov.f32 	%f1179, %f1176;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__300_38;
$Lt_123_246274:
	.loc	22	460	0
	mov.f32 	%f1179, %f813;
$LDWendi__Z5ClampIfET_S0_S0_S0__300_38:
	.loc	22	482	0
	mul.ftz.f32 	%f1180, %f1179, %f64;
	fma.rn.ftz.f32 	%f1181, %f58, %f199, %f1180;
	mul.ftz.f32 	%f1182, %f201, %f1181;
	fma.rn.ftz.f32 	%f1156, %f62, %f204, %f1182;
	.loc	22	450	0
	mov.f32 	%f1183, 0f00000000;  	// 0
	max.ftz.f32 	%f834, %f63, %f1183;
	mov.f32 	%f1184, 0f00000000;  	// 0
	max.ftz.f32 	%f836, %f59, %f1184;
	mov.f32 	%f1185, 0f3f800000;  	// 1
	min.ftz.f32 	%f838, %f834, %f1185;
	mov.f32 	%f1186, 0f3f800000;  	// 1
	min.ftz.f32 	%f840, %f836, %f1186;
	add.ftz.f32 	%f1187, %f840, %f840;
	mov.f32 	%f1188, 0fbf800000;  	// -1
	add.ftz.f32 	%f1189, %f1187, %f1188;
	setp.gt.ftz.f32 	%p96, %f1189, %f838;
	@!%p96 bra 	$Lt_123_246530;
	.loc	22	452	0
	mov.f32 	%f1190, %f1189;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__300_36;
$Lt_123_246530:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p97, %f1187, %f838;
	@!%p97 bra 	$Lt_123_246786;
	.loc	22	456	0
	mov.f32 	%f1190, %f1187;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__300_36;
$Lt_123_246786:
	.loc	22	460	0
	mov.f32 	%f1190, %f838;
$LDWendi__Z5ClampIfET_S0_S0_S0__300_36:
	.loc	22	482	0
	mul.ftz.f32 	%f1191, %f1190, %f64;
	fma.rn.ftz.f32 	%f1192, %f59, %f199, %f1191;
	mul.ftz.f32 	%f1193, %f201, %f1192;
	fma.rn.ftz.f32 	%f1155, %f63, %f204, %f1193;
$Lt_123_295426:
	.loc	6	207	0
	mov.f32 	%f57, %f1157;
	mov.f32 	%f58, %f1156;
	mov.f32 	%f59, %f1155;
	mov.f32 	%f60, %f1151;
	bra.uni 	$Lt_123_315906;
$Lt_123_4866:
	.loc	22	483	0
	ld.param.f32 	%f1194, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1194, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1195, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1195;
	mov.f32 	%f1196, %f190;
	mov.f32 	%f1197, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1198, %f190, %f1197;
	mov.f32 	%f1199, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p98, %f1198, %f1199;
	@!%p98 bra 	$Lt_123_296194;
	mov.f32 	%f1200, 0f00000000;  	// 0
	mov.f32 	%f1201, 0f00000000;  	// 0
	mov.f32 	%f1202, 0f00000000;  	// 0
	mov.f32 	%f1196, 0f00000000;  	// 0
	bra.uni 	$Lt_123_295938;
$Lt_123_296194:
	mov.f32 	%f1203, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1203, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1204, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1205, %f187, %f200;
	sub.ftz.f32 	%f204, %f1204, %f1205;
	mov.f32 	%f1206, 0f00000000;  	// 0
	mov.f32 	%f1207, 0f3f800000;  	// 1
	mov.f32 	%f1208, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1209, %f1208, %f61;
	setp.lt.ftz.f32 	%p99, %f57, %f1209;
	selp.f32 	%f1210, %f1206, %f1207, %p99;
	mul.ftz.f32 	%f1211, %f1210, %f64;
	fma.rn.ftz.f32 	%f1212, %f57, %f199, %f1211;
	mul.ftz.f32 	%f1213, %f201, %f1212;
	fma.rn.ftz.f32 	%f1202, %f61, %f204, %f1213;
	mov.f32 	%f1214, 0f00000000;  	// 0
	mov.f32 	%f1215, 0f3f800000;  	// 1
	mov.f32 	%f1216, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1217, %f1216, %f62;
	setp.lt.ftz.f32 	%p100, %f58, %f1217;
	selp.f32 	%f1218, %f1214, %f1215, %p100;
	mul.ftz.f32 	%f1219, %f1218, %f64;
	fma.rn.ftz.f32 	%f1220, %f58, %f199, %f1219;
	mul.ftz.f32 	%f1221, %f201, %f1220;
	fma.rn.ftz.f32 	%f1201, %f62, %f204, %f1221;
	mov.f32 	%f1222, 0f00000000;  	// 0
	mov.f32 	%f1223, 0f3f800000;  	// 1
	mov.f32 	%f1224, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1225, %f1224, %f63;
	setp.lt.ftz.f32 	%p101, %f59, %f1225;
	selp.f32 	%f1226, %f1222, %f1223, %p101;
	mul.ftz.f32 	%f1227, %f1226, %f64;
	fma.rn.ftz.f32 	%f1228, %f59, %f199, %f1227;
	mul.ftz.f32 	%f1229, %f201, %f1228;
	fma.rn.ftz.f32 	%f1200, %f63, %f204, %f1229;
$Lt_123_295938:
	.loc	6	208	0
	mov.f32 	%f57, %f1202;
	mov.f32 	%f58, %f1201;
	mov.f32 	%f59, %f1200;
	mov.f32 	%f60, %f1196;
	bra.uni 	$Lt_123_315906;
$Lt_123_5122:
	.loc	22	484	0
	ld.param.f32 	%f1230, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1230, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1231, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1231;
	mov.f32 	%f1232, %f190;
	mov.f32 	%f1233, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1234, %f190, %f1233;
	mov.f32 	%f1235, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p102, %f1234, %f1235;
	@!%p102 bra 	$Lt_123_296706;
	mov.f32 	%f1236, 0f00000000;  	// 0
	mov.f32 	%f1237, 0f00000000;  	// 0
	mov.f32 	%f1238, 0f00000000;  	// 0
	mov.f32 	%f1232, 0f00000000;  	// 0
	bra.uni 	$Lt_123_296450;
$Lt_123_296706:
	mov.f32 	%f1239, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1239, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1240, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1241, %f187, %f200;
	sub.ftz.f32 	%f204, %f1240, %f1241;
	sub.ftz.f32 	%f1242, %f57, %f61;
	abs.ftz.f32 	%f1243, %f1242;
	mul.ftz.f32 	%f1244, %f64, %f1243;
	fma.rn.ftz.f32 	%f1245, %f57, %f199, %f1244;
	mul.ftz.f32 	%f1246, %f201, %f1245;
	fma.rn.ftz.f32 	%f1238, %f61, %f204, %f1246;
	sub.ftz.f32 	%f1247, %f58, %f62;
	abs.ftz.f32 	%f1248, %f1247;
	mul.ftz.f32 	%f1249, %f64, %f1248;
	fma.rn.ftz.f32 	%f1250, %f58, %f199, %f1249;
	mul.ftz.f32 	%f1251, %f201, %f1250;
	fma.rn.ftz.f32 	%f1237, %f62, %f204, %f1251;
	sub.ftz.f32 	%f1252, %f59, %f63;
	abs.ftz.f32 	%f1253, %f1252;
	mul.ftz.f32 	%f1254, %f64, %f1253;
	fma.rn.ftz.f32 	%f1255, %f59, %f199, %f1254;
	mul.ftz.f32 	%f1256, %f201, %f1255;
	fma.rn.ftz.f32 	%f1236, %f63, %f204, %f1256;
$Lt_123_296450:
	.loc	6	209	0
	mov.f32 	%f57, %f1238;
	mov.f32 	%f58, %f1237;
	mov.f32 	%f59, %f1236;
	mov.f32 	%f60, %f1232;
	bra.uni 	$Lt_123_315906;
$Lt_123_5378:
	.loc	22	485	0
	ld.param.f32 	%f1257, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1257, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1258, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1258;
	mov.f32 	%f1259, %f190;
	mov.f32 	%f1260, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1261, %f190, %f1260;
	mov.f32 	%f1262, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p103, %f1261, %f1262;
	@!%p103 bra 	$Lt_123_297218;
	mov.f32 	%f1263, 0f00000000;  	// 0
	mov.f32 	%f1264, 0f00000000;  	// 0
	mov.f32 	%f1265, 0f00000000;  	// 0
	mov.f32 	%f1259, 0f00000000;  	// 0
	bra.uni 	$Lt_123_296962;
$Lt_123_297218:
	mov.f32 	%f1266, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1266, %f64;
	mov.f32 	%f1267, 0f00000000;  	// 0
	max.ftz.f32 	%f781, %f61, %f1267;
	mov.f32 	%f1268, 0f00000000;  	// 0
	max.ftz.f32 	%f783, %f57, %f1268;
	mov.f32 	%f1269, 0f3f800000;  	// 1
	min.ftz.f32 	%f785, %f781, %f1269;
	mov.f32 	%f1270, 0f3f800000;  	// 1
	min.ftz.f32 	%f787, %f783, %f1270;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1271, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1272, %f187, %f200;
	sub.ftz.f32 	%f204, %f1271, %f1272;
	add.ftz.f32 	%f1273, %f787, %f785;
	add.ftz.f32 	%f1274, %f787, %f787;
	mul.ftz.f32 	%f1275, %f785, %f1274;
	sub.ftz.f32 	%f1276, %f1273, %f1275;
	mov.f32 	%f1277, 0f00000000;  	// 0
	max.ftz.f32 	%f1278, %f1276, %f1277;
	mov.f32 	%f1279, 0f3f800000;  	// 1
	min.ftz.f32 	%f1280, %f1278, %f1279;
	mul.ftz.f32 	%f1281, %f64, %f1280;
	fma.rn.ftz.f32 	%f1282, %f57, %f199, %f1281;
	mul.ftz.f32 	%f1283, %f201, %f1282;
	fma.rn.ftz.f32 	%f1265, %f61, %f204, %f1283;
	mov.f32 	%f1284, 0f00000000;  	// 0
	max.ftz.f32 	%f809, %f62, %f1284;
	mov.f32 	%f1285, 0f00000000;  	// 0
	max.ftz.f32 	%f811, %f58, %f1285;
	mov.f32 	%f1286, 0f3f800000;  	// 1
	min.ftz.f32 	%f813, %f809, %f1286;
	mov.f32 	%f1287, 0f3f800000;  	// 1
	min.ftz.f32 	%f815, %f811, %f1287;
	add.ftz.f32 	%f1288, %f815, %f813;
	add.ftz.f32 	%f1289, %f815, %f815;
	mul.ftz.f32 	%f1290, %f813, %f1289;
	sub.ftz.f32 	%f1291, %f1288, %f1290;
	mov.f32 	%f1292, 0f00000000;  	// 0
	max.ftz.f32 	%f1293, %f1291, %f1292;
	mov.f32 	%f1294, 0f3f800000;  	// 1
	min.ftz.f32 	%f1295, %f1293, %f1294;
	mul.ftz.f32 	%f1296, %f64, %f1295;
	fma.rn.ftz.f32 	%f1297, %f58, %f199, %f1296;
	mul.ftz.f32 	%f1298, %f201, %f1297;
	fma.rn.ftz.f32 	%f1264, %f62, %f204, %f1298;
	mov.f32 	%f1299, 0f00000000;  	// 0
	max.ftz.f32 	%f834, %f63, %f1299;
	mov.f32 	%f1300, 0f00000000;  	// 0
	max.ftz.f32 	%f836, %f59, %f1300;
	mov.f32 	%f1301, 0f3f800000;  	// 1
	min.ftz.f32 	%f838, %f834, %f1301;
	mov.f32 	%f1302, 0f3f800000;  	// 1
	min.ftz.f32 	%f840, %f836, %f1302;
	add.ftz.f32 	%f1303, %f840, %f838;
	add.ftz.f32 	%f1304, %f840, %f840;
	mul.ftz.f32 	%f1305, %f838, %f1304;
	sub.ftz.f32 	%f1306, %f1303, %f1305;
	mov.f32 	%f1307, 0f00000000;  	// 0
	max.ftz.f32 	%f1308, %f1306, %f1307;
	mov.f32 	%f1309, 0f3f800000;  	// 1
	min.ftz.f32 	%f1310, %f1308, %f1309;
	mul.ftz.f32 	%f1311, %f64, %f1310;
	fma.rn.ftz.f32 	%f1312, %f59, %f199, %f1311;
	mul.ftz.f32 	%f1313, %f201, %f1312;
	fma.rn.ftz.f32 	%f1263, %f63, %f204, %f1313;
$Lt_123_296962:
	.loc	6	210	0
	mov.f32 	%f57, %f1265;
	mov.f32 	%f58, %f1264;
	mov.f32 	%f59, %f1263;
	mov.f32 	%f60, %f1259;
	bra.uni 	$Lt_123_315906;
$Lt_123_5634:
	.loc	22	486	0
	ld.param.f32 	%f1314, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1314, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1315, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1315;
	mov.f32 	%f1316, %f190;
	mov.f32 	%f1317, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1318, %f190, %f1317;
	mov.f32 	%f1319, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p104, %f1318, %f1319;
	@!%p104 bra 	$Lt_123_297730;
	mov.f32 	%f1320, 0f00000000;  	// 0
	mov.f32 	%f1321, 0f00000000;  	// 0
	mov.f32 	%f1322, 0f00000000;  	// 0
	mov.f32 	%f1316, 0f00000000;  	// 0
	bra.uni 	$Lt_123_297474;
$Lt_123_297730:
	mov.f32 	%f1323, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1323, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1324, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1325, %f187, %f200;
	sub.ftz.f32 	%f204, %f1324, %f1325;
	mov.f32 	%f1326, 0f00000000;  	// 0
	max.ftz.f32 	%f1327, %f61, %f1326;
	mov.f32 	%f1328, 0f3f800000;  	// 1
	min.ftz.f32 	%f1329, %f1327, %f1328;
	mov.f32 	%f1330, 0f00000000;  	// 0
	max.ftz.f32 	%f1331, %f57, %f1330;
	mov.f32 	%f1332, 0f3f800000;  	// 1
	min.ftz.f32 	%f1333, %f1331, %f1332;
	sub.ftz.f32 	%f1334, %f1329, %f1333;
	mov.f32 	%f1335, 0f00000000;  	// 0
	max.ftz.f32 	%f1336, %f1334, %f1335;
	mov.f32 	%f1337, 0f3f800000;  	// 1
	min.ftz.f32 	%f1338, %f1336, %f1337;
	mul.ftz.f32 	%f1339, %f64, %f1338;
	fma.rn.ftz.f32 	%f1340, %f57, %f199, %f1339;
	mul.ftz.f32 	%f1341, %f201, %f1340;
	fma.rn.ftz.f32 	%f1322, %f61, %f204, %f1341;
	mov.f32 	%f1342, 0f00000000;  	// 0
	max.ftz.f32 	%f1343, %f62, %f1342;
	mov.f32 	%f1344, 0f3f800000;  	// 1
	min.ftz.f32 	%f1345, %f1343, %f1344;
	mov.f32 	%f1346, 0f00000000;  	// 0
	max.ftz.f32 	%f1347, %f58, %f1346;
	mov.f32 	%f1348, 0f3f800000;  	// 1
	min.ftz.f32 	%f1349, %f1347, %f1348;
	sub.ftz.f32 	%f1350, %f1345, %f1349;
	mov.f32 	%f1351, 0f00000000;  	// 0
	max.ftz.f32 	%f1352, %f1350, %f1351;
	mov.f32 	%f1353, 0f3f800000;  	// 1
	min.ftz.f32 	%f1354, %f1352, %f1353;
	mul.ftz.f32 	%f1355, %f64, %f1354;
	fma.rn.ftz.f32 	%f1356, %f58, %f199, %f1355;
	mul.ftz.f32 	%f1357, %f201, %f1356;
	fma.rn.ftz.f32 	%f1321, %f62, %f204, %f1357;
	mov.f32 	%f1358, 0f00000000;  	// 0
	max.ftz.f32 	%f1359, %f63, %f1358;
	mov.f32 	%f1360, 0f3f800000;  	// 1
	min.ftz.f32 	%f1361, %f1359, %f1360;
	mov.f32 	%f1362, 0f00000000;  	// 0
	max.ftz.f32 	%f1363, %f59, %f1362;
	mov.f32 	%f1364, 0f3f800000;  	// 1
	min.ftz.f32 	%f1365, %f1363, %f1364;
	sub.ftz.f32 	%f1366, %f1361, %f1365;
	mov.f32 	%f1367, 0f00000000;  	// 0
	max.ftz.f32 	%f1368, %f1366, %f1367;
	mov.f32 	%f1369, 0f3f800000;  	// 1
	min.ftz.f32 	%f1370, %f1368, %f1369;
	mul.ftz.f32 	%f1371, %f64, %f1370;
	fma.rn.ftz.f32 	%f1372, %f59, %f199, %f1371;
	mul.ftz.f32 	%f1373, %f201, %f1372;
	fma.rn.ftz.f32 	%f1320, %f63, %f204, %f1373;
$Lt_123_297474:
	.loc	6	211	0
	mov.f32 	%f57, %f1322;
	mov.f32 	%f58, %f1321;
	mov.f32 	%f59, %f1320;
	mov.f32 	%f60, %f1316;
	bra.uni 	$Lt_123_315906;
$Lt_123_5890:
	.loc	22	487	0
	ld.param.f32 	%f1374, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1374, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1375, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1375;
	mov.f32 	%f1376, %f190;
	mov.f32 	%f1377, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1378, %f190, %f1377;
	mov.f32 	%f1379, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p105, %f1378, %f1379;
	@!%p105 bra 	$Lt_123_298242;
	mov.f32 	%f1380, 0f00000000;  	// 0
	mov.f32 	%f1381, 0f00000000;  	// 0
	mov.f32 	%f1382, 0f00000000;  	// 0
	mov.f32 	%f1376, 0f00000000;  	// 0
	bra.uni 	$Lt_123_297986;
$Lt_123_298242:
	mov.f32 	%f1383, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1383, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1384, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1385, %f187, %f200;
	sub.ftz.f32 	%f204, %f1384, %f1385;
	mov.f32 	%f1386, 0f00000000;  	// 0
	max.ftz.f32 	%f1387, %f61, %f1386;
	mov.f32 	%f1388, 0f3f800000;  	// 1
	min.ftz.f32 	%f1389, %f1387, %f1388;
	mov.f32 	%f1390, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1391, %f57, %f1390;
	mov.f32 	%f1392, 0f3f800000;  	// 1
	min.ftz.f32 	%f1393, %f1391, %f1392;
	div.approx.ftz.f32 	%f1394, %f1389, %f1393;
	mov.f32 	%f1395, 0f00000000;  	// 0
	max.ftz.f32 	%f1396, %f1394, %f1395;
	mov.f32 	%f1397, 0f3f800000;  	// 1
	min.ftz.f32 	%f1398, %f1396, %f1397;
	mul.ftz.f32 	%f1399, %f64, %f1398;
	fma.rn.ftz.f32 	%f1400, %f57, %f199, %f1399;
	mul.ftz.f32 	%f1401, %f201, %f1400;
	fma.rn.ftz.f32 	%f1382, %f61, %f204, %f1401;
	mov.f32 	%f1402, 0f00000000;  	// 0
	max.ftz.f32 	%f1403, %f62, %f1402;
	mov.f32 	%f1404, 0f3f800000;  	// 1
	min.ftz.f32 	%f1405, %f1403, %f1404;
	mov.f32 	%f1406, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1407, %f58, %f1406;
	mov.f32 	%f1408, 0f3f800000;  	// 1
	min.ftz.f32 	%f1409, %f1407, %f1408;
	div.approx.ftz.f32 	%f1410, %f1405, %f1409;
	mov.f32 	%f1411, 0f00000000;  	// 0
	max.ftz.f32 	%f1412, %f1410, %f1411;
	mov.f32 	%f1413, 0f3f800000;  	// 1
	min.ftz.f32 	%f1414, %f1412, %f1413;
	mul.ftz.f32 	%f1415, %f64, %f1414;
	fma.rn.ftz.f32 	%f1416, %f58, %f199, %f1415;
	mul.ftz.f32 	%f1417, %f201, %f1416;
	fma.rn.ftz.f32 	%f1381, %f62, %f204, %f1417;
	mov.f32 	%f1418, 0f00000000;  	// 0
	max.ftz.f32 	%f1419, %f63, %f1418;
	mov.f32 	%f1420, 0f3f800000;  	// 1
	min.ftz.f32 	%f1421, %f1419, %f1420;
	mov.f32 	%f1422, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1423, %f59, %f1422;
	mov.f32 	%f1424, 0f3f800000;  	// 1
	min.ftz.f32 	%f1425, %f1423, %f1424;
	div.approx.ftz.f32 	%f1426, %f1421, %f1425;
	mov.f32 	%f1427, 0f00000000;  	// 0
	max.ftz.f32 	%f1428, %f1426, %f1427;
	mov.f32 	%f1429, 0f3f800000;  	// 1
	min.ftz.f32 	%f1430, %f1428, %f1429;
	mul.ftz.f32 	%f1431, %f64, %f1430;
	fma.rn.ftz.f32 	%f1432, %f59, %f199, %f1431;
	mul.ftz.f32 	%f1433, %f201, %f1432;
	fma.rn.ftz.f32 	%f1380, %f63, %f204, %f1433;
$Lt_123_297986:
	.loc	6	212	0
	mov.f32 	%f57, %f1382;
	mov.f32 	%f58, %f1381;
	mov.f32 	%f59, %f1380;
	mov.f32 	%f60, %f1376;
	bra.uni 	$Lt_123_315906;
$Lt_123_6146:
	.loc	22	154	0
	setp.lt.ftz.f32 	%p106, %f57, %f58;
	max.ftz.f32 	%f1434, %f57, %f58;
	selp.f32 	%f1435, %f57, %f58, %p106;
	max.ftz.f32 	%f1436, %f1434, %f59;
	setp.lt.ftz.f32 	%p107, %f1435, %f59;
	selp.f32 	%f1437, %f1435, %f59, %p107;
	setp.eq.ftz.f32 	%p108, %f1437, %f59;
	@!%p108 bra 	$Lt_123_298754;
	setp.eq.ftz.f32 	%p109, %f1436, %f58;
	@!%p109 bra 	$Lt_123_299266;
	setp.gt.ftz.f32 	%p110, %f58, %f59;
	@!%p110 bra 	$Lt_123_299778;
	.loc	22	161	0
	max.ftz.f32 	%f1438, %f61, %f62;
	setp.lt.ftz.f32 	%p111, %f61, %f62;
	max.ftz.f32 	%f1439, %f1438, %f63;
	selp.f32 	%f1440, %f61, %f62, %p111;
	setp.lt.ftz.f32 	%p112, %f1440, %f63;
	selp.f32 	%f1441, %f1440, %f63, %p112;
	sub.ftz.f32 	%f1442, %f1439, %f1441;
	cvt.ftz.sat.f32.f32 	%f1443, %f1442;
	sub.ftz.f32 	%f1444, %f57, %f59;
	mul.ftz.f32 	%f1445, %f1443, %f1444;
	sub.ftz.f32 	%f1446, %f58, %f59;
	div.approx.ftz.f32 	%f1447, %f1445, %f1446;
	.loc	22	162	0
	mov.f32 	%f1448, %f1443;
	bra.uni 	$Lt_123_300034;
$Lt_123_299778:
	.loc	22	166	0
	mov.f32 	%f1447, 0f00000000;  	// 0
	mov.f32 	%f1448, 0f00000000;  	// 0
	bra.uni 	$Lt_123_300034;
$Lt_123_299266:
	setp.gt.ftz.f32 	%p113, %f57, %f59;
	@!%p113 bra 	$Lt_123_300290;
	.loc	22	173	0
	max.ftz.f32 	%f1438, %f61, %f62;
	setp.lt.ftz.f32 	%p111, %f61, %f62;
	max.ftz.f32 	%f1439, %f1438, %f63;
	selp.f32 	%f1440, %f61, %f62, %p111;
	setp.lt.ftz.f32 	%p112, %f1440, %f63;
	selp.f32 	%f1441, %f1440, %f63, %p112;
	sub.ftz.f32 	%f1442, %f1439, %f1441;
	cvt.ftz.sat.f32.f32 	%f1443, %f1442;
	sub.ftz.f32 	%f1449, %f58, %f59;
	mul.ftz.f32 	%f1450, %f1443, %f1449;
	sub.ftz.f32 	%f1451, %f57, %f59;
	div.approx.ftz.f32 	%f1448, %f1450, %f1451;
	.loc	22	174	0
	mov.f32 	%f1447, %f1443;
	bra.uni 	$Lt_123_300034;
$Lt_123_300290:
	.loc	22	178	0
	mov.f32 	%f1447, 0f00000000;  	// 0
	mov.f32 	%f1448, 0f00000000;  	// 0
$Lt_123_300034:
$Lt_123_299010:
	mov.f32 	%f1452, 0f00000000;  	// 0
	bra.uni 	$Lt_123_302594;
$Lt_123_298754:
	setp.eq.ftz.f32 	%p114, %f1437, %f58;
	setp.eq.ftz.f32 	%p115, %f1436, %f59;
	@!%p115 bra 	$Lt_123_300802;
	@!%p114 bra 	$Lt_123_301314;
	setp.lt.ftz.f32 	%p116, %f58, %f59;
	@!%p116 bra 	$Lt_123_301826;
	.loc	22	191	0
	max.ftz.f32 	%f1438, %f61, %f62;
	setp.lt.ftz.f32 	%p111, %f61, %f62;
	max.ftz.f32 	%f1439, %f1438, %f63;
	selp.f32 	%f1440, %f61, %f62, %p111;
	setp.lt.ftz.f32 	%p112, %f1440, %f63;
	selp.f32 	%f1441, %f1440, %f63, %p112;
	sub.ftz.f32 	%f1442, %f1439, %f1441;
	cvt.ftz.sat.f32.f32 	%f1443, %f1442;
	sub.ftz.f32 	%f1453, %f57, %f58;
	mul.ftz.f32 	%f1454, %f1443, %f1453;
	sub.ftz.f32 	%f1455, %f59, %f58;
	div.approx.ftz.f32 	%f1447, %f1454, %f1455;
	.loc	22	192	0
	mov.f32 	%f1452, %f1443;
	bra.uni 	$Lt_123_301570;
$Lt_123_301826:
	.loc	22	196	0
	mov.f32 	%f1447, 0f00000000;  	// 0
	mov.f32 	%f1452, 0f00000000;  	// 0
$Lt_123_301570:
	mov.f32 	%f1448, 0f00000000;  	// 0
	bra.uni 	$Lt_123_302594;
$Lt_123_301314:
	setp.lt.ftz.f32 	%p117, %f57, %f59;
	@!%p117 bra 	$Lt_123_302338;
	.loc	22	204	0
	max.ftz.f32 	%f1438, %f61, %f62;
	setp.lt.ftz.f32 	%p111, %f61, %f62;
	max.ftz.f32 	%f1439, %f1438, %f63;
	selp.f32 	%f1440, %f61, %f62, %p111;
	setp.lt.ftz.f32 	%p112, %f1440, %f63;
	selp.f32 	%f1441, %f1440, %f63, %p112;
	sub.ftz.f32 	%f1442, %f1439, %f1441;
	cvt.ftz.sat.f32.f32 	%f1443, %f1442;
	sub.ftz.f32 	%f1456, %f58, %f57;
	mul.ftz.f32 	%f1457, %f1443, %f1456;
	sub.ftz.f32 	%f1458, %f59, %f57;
	div.approx.ftz.f32 	%f1448, %f1457, %f1458;
	.loc	22	205	0
	mov.f32 	%f1452, %f1443;
	bra.uni 	$Lt_123_302082;
$Lt_123_302338:
	.loc	22	209	0
	mov.f32 	%f1452, 0f00000000;  	// 0
	mov.f32 	%f1448, 0f00000000;  	// 0
$Lt_123_302082:
	.loc	22	211	0
	mov.f32 	%f1447, 0f00000000;  	// 0
	bra.uni 	$Lt_123_302594;
$Lt_123_300802:
	@!%p114 bra 	$Lt_123_302850;
	setp.gt.ftz.f32 	%p118, %f57, %f58;
	@!%p118 bra 	$Lt_123_303362;
	.loc	22	220	0
	max.ftz.f32 	%f1438, %f61, %f62;
	setp.lt.ftz.f32 	%p111, %f61, %f62;
	max.ftz.f32 	%f1439, %f1438, %f63;
	selp.f32 	%f1440, %f61, %f62, %p111;
	setp.lt.ftz.f32 	%p112, %f1440, %f63;
	selp.f32 	%f1441, %f1440, %f63, %p112;
	sub.ftz.f32 	%f1442, %f1439, %f1441;
	cvt.ftz.sat.f32.f32 	%f1443, %f1442;
	sub.ftz.f32 	%f1459, %f59, %f58;
	mul.ftz.f32 	%f1460, %f1443, %f1459;
	sub.ftz.f32 	%f1461, %f57, %f58;
	div.approx.ftz.f32 	%f1452, %f1460, %f1461;
	.loc	22	221	0
	mov.f32 	%f1447, %f1443;
	bra.uni 	$Lt_123_303106;
$Lt_123_303362:
	.loc	22	225	0
	mov.f32 	%f1447, 0f00000000;  	// 0
	mov.f32 	%f1452, 0f00000000;  	// 0
$Lt_123_303106:
	mov.f32 	%f1448, 0f00000000;  	// 0
	bra.uni 	$Lt_123_302594;
$Lt_123_302850:
	@!%p106 bra 	$Lt_123_303874;
	.loc	22	233	0
	max.ftz.f32 	%f1438, %f61, %f62;
	setp.lt.ftz.f32 	%p111, %f61, %f62;
	max.ftz.f32 	%f1439, %f1438, %f63;
	selp.f32 	%f1440, %f61, %f62, %p111;
	setp.lt.ftz.f32 	%p112, %f1440, %f63;
	selp.f32 	%f1441, %f1440, %f63, %p112;
	sub.ftz.f32 	%f1442, %f1439, %f1441;
	cvt.ftz.sat.f32.f32 	%f1443, %f1442;
	sub.ftz.f32 	%f1462, %f59, %f57;
	mul.ftz.f32 	%f1463, %f1443, %f1462;
	sub.ftz.f32 	%f1464, %f58, %f57;
	div.approx.ftz.f32 	%f1452, %f1463, %f1464;
	.loc	22	234	0
	mov.f32 	%f1448, %f1443;
	bra.uni 	$Lt_123_303618;
$Lt_123_303874:
	.loc	22	238	0
	mov.f32 	%f1452, 0f00000000;  	// 0
	mov.f32 	%f1448, 0f00000000;  	// 0
$Lt_123_303618:
	.loc	22	240	0
	mov.f32 	%f1447, 0f00000000;  	// 0
$Lt_123_302594:
$Lt_123_300546:
$Lt_123_298498:
	.loc	22	113	0
	ld.const.f32 	%f523, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1465, %f62, %f523;
	mul.ftz.f32 	%f1466, %f1448, %f523;
	ld.const.f32 	%f522, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1467, %f522, %f63, %f1465;
	fma.rn.ftz.f32 	%f1468, %f522, %f1452, %f1466;
	ld.const.f32 	%f521, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1469, %f521, %f61, %f1467;
	fma.rn.ftz.f32 	%f1470, %f521, %f1447, %f1468;
	cvt.ftz.sat.f32.f32 	%f1471, %f1469;
	cvt.ftz.sat.f32.f32 	%f1472, %f1470;
	sub.ftz.f32 	%f1473, %f1471, %f1472;
	add.ftz.f32 	%f1474, %f1473, %f1447;
	mov.f32 	%f1475, %f1474;
	add.ftz.f32 	%f1476, %f1473, %f1448;
	mov.f32 	%f1477, %f1476;
	add.ftz.f32 	%f1478, %f1473, %f1452;
	mov.f32 	%f1479, %f1478;
	.loc	22	50	0
	mul.ftz.f32 	%f1480, %f1476, %f523;
	fma.rn.ftz.f32 	%f1481, %f522, %f1478, %f1480;
	fma.rn.ftz.f32 	%f1482, %f521, %f1474, %f1481;
	cvt.ftz.sat.f32.f32 	%f1483, %f1482;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p119, %f1476, %f1474;
	selp.f32 	%f1484, %f1474, %f1476, %p119;
	setp.lt.ftz.f32 	%p120, %f1484, %f1478;
	selp.f32 	%f1485, %f1484, %f1478, %p120;
	mov.f32 	%f1486, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p121, %f1485, %f1486;
	@!%p121 bra 	$Lt_123_304130;
	.loc	22	119	0
	sub.ftz.f32 	%f1487, %f1483, %f1485;
	sub.ftz.f32 	%f1488, %f1478, %f1483;
	mul.ftz.f32 	%f1489, %f1483, %f1488;
	div.approx.ftz.f32 	%f1490, %f1489, %f1487;
	add.ftz.f32 	%f1479, %f1483, %f1490;
	.loc	22	120	0
	sub.ftz.f32 	%f1491, %f1476, %f1483;
	mul.ftz.f32 	%f1492, %f1483, %f1491;
	div.approx.ftz.f32 	%f1493, %f1492, %f1487;
	add.ftz.f32 	%f1477, %f1483, %f1493;
	.loc	22	121	0
	sub.ftz.f32 	%f1494, %f1474, %f1483;
	mul.ftz.f32 	%f1495, %f1483, %f1494;
	div.approx.ftz.f32 	%f1496, %f1495, %f1487;
	add.ftz.f32 	%f1475, %f1483, %f1496;
$Lt_123_304130:
	max.ftz.f32 	%f1497, %f1476, %f1474;
	max.ftz.f32 	%f1498, %f1497, %f1478;
	mov.f32 	%f1499, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p122, %f1498, %f1499;
	@!%p122 bra 	$Lt_123_304642;
	.loc	27	529	0
	mov.f32 	%f1500, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1501, %f1500, %f1483;
	sub.ftz.f32 	%f1502, %f1498, %f1483;
	sub.ftz.f32 	%f1503, %f1479, %f1483;
	mul.ftz.f32 	%f1504, %f1501, %f1503;
	div.approx.ftz.f32 	%f1505, %f1504, %f1502;
	.loc	22	125	0
	add.ftz.f32 	%f1479, %f1505, %f1483;
	.loc	27	529	0
	sub.ftz.f32 	%f1506, %f1477, %f1483;
	mul.ftz.f32 	%f1507, %f1501, %f1506;
	div.approx.ftz.f32 	%f1508, %f1507, %f1502;
	.loc	22	126	0
	add.ftz.f32 	%f1477, %f1508, %f1483;
	.loc	27	529	0
	sub.ftz.f32 	%f1509, %f1475, %f1483;
	mul.ftz.f32 	%f1510, %f1501, %f1509;
	div.approx.ftz.f32 	%f1511, %f1510, %f1502;
	.loc	22	127	0
	add.ftz.f32 	%f1475, %f1511, %f1483;
$Lt_123_304642:
	.loc	22	468	0
	ld.param.f32 	%f1512, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1512, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1513, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1513;
	mov.f32 	%f1514, %f190;
	mov.f32 	%f1515, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1516, %f190, %f1515;
	mov.f32 	%f1517, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p123, %f1516, %f1517;
	@!%p123 bra 	$Lt_123_305410;
	mov.f32 	%f1518, 0f00000000;  	// 0
	mov.f32 	%f1519, 0f00000000;  	// 0
	mov.f32 	%f1520, 0f00000000;  	// 0
	mov.f32 	%f1514, 0f00000000;  	// 0
	bra.uni 	$Lt_123_305154;
$Lt_123_305410:
	mov.f32 	%f1521, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1521, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1522, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1523, %f187, %f200;
	sub.ftz.f32 	%f204, %f1522, %f1523;
	mul.ftz.f32 	%f1524, %f199, %f1475;
	fma.rn.ftz.f32 	%f1525, %f1475, %f64, %f1524;
	mul.ftz.f32 	%f1526, %f201, %f1525;
	fma.rn.ftz.f32 	%f1520, %f61, %f204, %f1526;
	mul.ftz.f32 	%f1527, %f199, %f1477;
	fma.rn.ftz.f32 	%f1528, %f1477, %f64, %f1527;
	mul.ftz.f32 	%f1529, %f201, %f1528;
	fma.rn.ftz.f32 	%f1519, %f62, %f204, %f1529;
	mul.ftz.f32 	%f1530, %f199, %f1479;
	fma.rn.ftz.f32 	%f1531, %f1479, %f64, %f1530;
	mul.ftz.f32 	%f1532, %f201, %f1531;
	fma.rn.ftz.f32 	%f1518, %f63, %f204, %f1532;
$Lt_123_305154:
	.loc	6	213	0
	mov.f32 	%f57, %f1520;
	mov.f32 	%f58, %f1519;
	mov.f32 	%f59, %f1518;
	mov.f32 	%f60, %f1514;
	bra.uni 	$Lt_123_315906;
$Lt_123_6402:
	.loc	22	154	0
	max.ftz.f32 	%f1438, %f61, %f62;
	setp.lt.ftz.f32 	%p111, %f61, %f62;
	max.ftz.f32 	%f1439, %f1438, %f63;
	selp.f32 	%f1440, %f61, %f62, %p111;
	setp.lt.ftz.f32 	%p112, %f1440, %f63;
	selp.f32 	%f1441, %f1440, %f63, %p112;
	setp.eq.ftz.f32 	%p124, %f1441, %f63;
	@!%p124 bra 	$Lt_123_305922;
	setp.eq.ftz.f32 	%p125, %f1439, %f62;
	@!%p125 bra 	$Lt_123_306434;
	setp.gt.ftz.f32 	%p126, %f62, %f63;
	@!%p126 bra 	$Lt_123_306946;
	.loc	22	161	0
	setp.lt.ftz.f32 	%p106, %f57, %f58;
	max.ftz.f32 	%f1434, %f57, %f58;
	selp.f32 	%f1435, %f57, %f58, %p106;
	max.ftz.f32 	%f1436, %f1434, %f59;
	setp.lt.ftz.f32 	%p107, %f1435, %f59;
	selp.f32 	%f1437, %f1435, %f59, %p107;
	sub.ftz.f32 	%f1533, %f1436, %f1437;
	cvt.ftz.sat.f32.f32 	%f1534, %f1533;
	sub.ftz.f32 	%f1535, %f61, %f63;
	mul.ftz.f32 	%f1536, %f1534, %f1535;
	sub.ftz.f32 	%f1537, %f62, %f63;
	div.approx.ftz.f32 	%f1538, %f1536, %f1537;
	.loc	22	162	0
	mov.f32 	%f1539, %f1534;
	bra.uni 	$Lt_123_307202;
$Lt_123_306946:
	.loc	22	166	0
	mov.f32 	%f1538, 0f00000000;  	// 0
	mov.f32 	%f1539, 0f00000000;  	// 0
	bra.uni 	$Lt_123_307202;
$Lt_123_306434:
	setp.gt.ftz.f32 	%p127, %f61, %f63;
	@!%p127 bra 	$Lt_123_307458;
	.loc	22	173	0
	setp.lt.ftz.f32 	%p106, %f57, %f58;
	max.ftz.f32 	%f1434, %f57, %f58;
	selp.f32 	%f1435, %f57, %f58, %p106;
	max.ftz.f32 	%f1436, %f1434, %f59;
	setp.lt.ftz.f32 	%p107, %f1435, %f59;
	selp.f32 	%f1437, %f1435, %f59, %p107;
	sub.ftz.f32 	%f1533, %f1436, %f1437;
	cvt.ftz.sat.f32.f32 	%f1534, %f1533;
	sub.ftz.f32 	%f1540, %f62, %f63;
	mul.ftz.f32 	%f1541, %f1534, %f1540;
	sub.ftz.f32 	%f1542, %f61, %f63;
	div.approx.ftz.f32 	%f1539, %f1541, %f1542;
	.loc	22	174	0
	mov.f32 	%f1538, %f1534;
	bra.uni 	$Lt_123_307202;
$Lt_123_307458:
	.loc	22	178	0
	mov.f32 	%f1538, 0f00000000;  	// 0
	mov.f32 	%f1539, 0f00000000;  	// 0
$Lt_123_307202:
$Lt_123_306178:
	mov.f32 	%f1543, 0f00000000;  	// 0
	bra.uni 	$Lt_123_309762;
$Lt_123_305922:
	setp.eq.ftz.f32 	%p128, %f1441, %f62;
	setp.eq.ftz.f32 	%p129, %f1439, %f63;
	@!%p129 bra 	$Lt_123_307970;
	@!%p128 bra 	$Lt_123_308482;
	setp.lt.ftz.f32 	%p130, %f62, %f63;
	@!%p130 bra 	$Lt_123_308994;
	.loc	22	191	0
	setp.lt.ftz.f32 	%p106, %f57, %f58;
	max.ftz.f32 	%f1434, %f57, %f58;
	selp.f32 	%f1435, %f57, %f58, %p106;
	max.ftz.f32 	%f1436, %f1434, %f59;
	setp.lt.ftz.f32 	%p107, %f1435, %f59;
	selp.f32 	%f1437, %f1435, %f59, %p107;
	sub.ftz.f32 	%f1533, %f1436, %f1437;
	cvt.ftz.sat.f32.f32 	%f1534, %f1533;
	sub.ftz.f32 	%f1544, %f61, %f62;
	mul.ftz.f32 	%f1545, %f1534, %f1544;
	sub.ftz.f32 	%f1546, %f63, %f62;
	div.approx.ftz.f32 	%f1538, %f1545, %f1546;
	.loc	22	192	0
	mov.f32 	%f1543, %f1534;
	bra.uni 	$Lt_123_308738;
$Lt_123_308994:
	.loc	22	196	0
	mov.f32 	%f1538, 0f00000000;  	// 0
	mov.f32 	%f1543, 0f00000000;  	// 0
$Lt_123_308738:
	mov.f32 	%f1539, 0f00000000;  	// 0
	bra.uni 	$Lt_123_309762;
$Lt_123_308482:
	setp.lt.ftz.f32 	%p131, %f61, %f63;
	@!%p131 bra 	$Lt_123_309506;
	.loc	22	204	0
	setp.lt.ftz.f32 	%p106, %f57, %f58;
	max.ftz.f32 	%f1434, %f57, %f58;
	selp.f32 	%f1435, %f57, %f58, %p106;
	max.ftz.f32 	%f1436, %f1434, %f59;
	setp.lt.ftz.f32 	%p107, %f1435, %f59;
	selp.f32 	%f1437, %f1435, %f59, %p107;
	sub.ftz.f32 	%f1533, %f1436, %f1437;
	cvt.ftz.sat.f32.f32 	%f1534, %f1533;
	sub.ftz.f32 	%f1547, %f62, %f61;
	mul.ftz.f32 	%f1548, %f1534, %f1547;
	sub.ftz.f32 	%f1549, %f63, %f61;
	div.approx.ftz.f32 	%f1539, %f1548, %f1549;
	.loc	22	205	0
	mov.f32 	%f1543, %f1534;
	bra.uni 	$Lt_123_309250;
$Lt_123_309506:
	.loc	22	209	0
	mov.f32 	%f1543, 0f00000000;  	// 0
	mov.f32 	%f1539, 0f00000000;  	// 0
$Lt_123_309250:
	.loc	22	211	0
	mov.f32 	%f1538, 0f00000000;  	// 0
	bra.uni 	$Lt_123_309762;
$Lt_123_307970:
	@!%p128 bra 	$Lt_123_310018;
	setp.gt.ftz.f32 	%p132, %f61, %f62;
	@!%p132 bra 	$Lt_123_310530;
	.loc	22	220	0
	setp.lt.ftz.f32 	%p106, %f57, %f58;
	max.ftz.f32 	%f1434, %f57, %f58;
	selp.f32 	%f1435, %f57, %f58, %p106;
	max.ftz.f32 	%f1436, %f1434, %f59;
	setp.lt.ftz.f32 	%p107, %f1435, %f59;
	selp.f32 	%f1437, %f1435, %f59, %p107;
	sub.ftz.f32 	%f1533, %f1436, %f1437;
	cvt.ftz.sat.f32.f32 	%f1534, %f1533;
	sub.ftz.f32 	%f1550, %f63, %f62;
	mul.ftz.f32 	%f1551, %f1534, %f1550;
	sub.ftz.f32 	%f1552, %f61, %f62;
	div.approx.ftz.f32 	%f1543, %f1551, %f1552;
	.loc	22	221	0
	mov.f32 	%f1538, %f1534;
	bra.uni 	$Lt_123_310274;
$Lt_123_310530:
	.loc	22	225	0
	mov.f32 	%f1538, 0f00000000;  	// 0
	mov.f32 	%f1543, 0f00000000;  	// 0
$Lt_123_310274:
	mov.f32 	%f1539, 0f00000000;  	// 0
	bra.uni 	$Lt_123_309762;
$Lt_123_310018:
	@!%p111 bra 	$Lt_123_311042;
	.loc	22	233	0
	setp.lt.ftz.f32 	%p106, %f57, %f58;
	max.ftz.f32 	%f1434, %f57, %f58;
	selp.f32 	%f1435, %f57, %f58, %p106;
	max.ftz.f32 	%f1436, %f1434, %f59;
	setp.lt.ftz.f32 	%p107, %f1435, %f59;
	selp.f32 	%f1437, %f1435, %f59, %p107;
	sub.ftz.f32 	%f1533, %f1436, %f1437;
	cvt.ftz.sat.f32.f32 	%f1534, %f1533;
	sub.ftz.f32 	%f1553, %f63, %f61;
	mul.ftz.f32 	%f1554, %f1534, %f1553;
	sub.ftz.f32 	%f1555, %f62, %f61;
	div.approx.ftz.f32 	%f1543, %f1554, %f1555;
	.loc	22	234	0
	mov.f32 	%f1539, %f1534;
	bra.uni 	$Lt_123_310786;
$Lt_123_311042:
	.loc	22	238	0
	mov.f32 	%f1543, 0f00000000;  	// 0
	mov.f32 	%f1539, 0f00000000;  	// 0
$Lt_123_310786:
	.loc	22	240	0
	mov.f32 	%f1538, 0f00000000;  	// 0
$Lt_123_309762:
$Lt_123_307714:
$Lt_123_305666:
	.loc	22	113	0
	ld.const.f32 	%f523, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1556, %f62, %f523;
	mul.ftz.f32 	%f1557, %f1539, %f523;
	ld.const.f32 	%f522, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1558, %f522, %f63, %f1556;
	fma.rn.ftz.f32 	%f1559, %f522, %f1543, %f1557;
	ld.const.f32 	%f521, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1560, %f521, %f61, %f1558;
	fma.rn.ftz.f32 	%f1561, %f521, %f1538, %f1559;
	cvt.ftz.sat.f32.f32 	%f1562, %f1560;
	cvt.ftz.sat.f32.f32 	%f1563, %f1561;
	sub.ftz.f32 	%f1564, %f1562, %f1563;
	add.ftz.f32 	%f1565, %f1564, %f1538;
	mov.f32 	%f1566, %f1565;
	add.ftz.f32 	%f1567, %f1564, %f1539;
	mov.f32 	%f1568, %f1567;
	add.ftz.f32 	%f1569, %f1564, %f1543;
	mov.f32 	%f1570, %f1569;
	.loc	22	50	0
	mul.ftz.f32 	%f1571, %f1567, %f523;
	fma.rn.ftz.f32 	%f1572, %f522, %f1569, %f1571;
	fma.rn.ftz.f32 	%f1573, %f521, %f1565, %f1572;
	cvt.ftz.sat.f32.f32 	%f1574, %f1573;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p133, %f1567, %f1565;
	selp.f32 	%f1575, %f1565, %f1567, %p133;
	setp.lt.ftz.f32 	%p134, %f1575, %f1569;
	selp.f32 	%f1576, %f1575, %f1569, %p134;
	mov.f32 	%f1577, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p135, %f1576, %f1577;
	@!%p135 bra 	$Lt_123_311298;
	.loc	22	119	0
	sub.ftz.f32 	%f1578, %f1574, %f1576;
	sub.ftz.f32 	%f1579, %f1569, %f1574;
	mul.ftz.f32 	%f1580, %f1574, %f1579;
	div.approx.ftz.f32 	%f1581, %f1580, %f1578;
	add.ftz.f32 	%f1570, %f1574, %f1581;
	.loc	22	120	0
	sub.ftz.f32 	%f1582, %f1567, %f1574;
	mul.ftz.f32 	%f1583, %f1574, %f1582;
	div.approx.ftz.f32 	%f1584, %f1583, %f1578;
	add.ftz.f32 	%f1568, %f1574, %f1584;
	.loc	22	121	0
	sub.ftz.f32 	%f1585, %f1565, %f1574;
	mul.ftz.f32 	%f1586, %f1574, %f1585;
	div.approx.ftz.f32 	%f1587, %f1586, %f1578;
	add.ftz.f32 	%f1566, %f1574, %f1587;
$Lt_123_311298:
	max.ftz.f32 	%f1588, %f1567, %f1565;
	max.ftz.f32 	%f1589, %f1588, %f1569;
	mov.f32 	%f1590, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p136, %f1589, %f1590;
	@!%p136 bra 	$Lt_123_311810;
	.loc	27	529	0
	mov.f32 	%f1591, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1592, %f1591, %f1574;
	sub.ftz.f32 	%f1593, %f1589, %f1574;
	sub.ftz.f32 	%f1594, %f1570, %f1574;
	mul.ftz.f32 	%f1595, %f1592, %f1594;
	div.approx.ftz.f32 	%f1596, %f1595, %f1593;
	.loc	22	125	0
	add.ftz.f32 	%f1570, %f1596, %f1574;
	.loc	27	529	0
	sub.ftz.f32 	%f1597, %f1568, %f1574;
	mul.ftz.f32 	%f1598, %f1592, %f1597;
	div.approx.ftz.f32 	%f1599, %f1598, %f1593;
	.loc	22	126	0
	add.ftz.f32 	%f1568, %f1599, %f1574;
	.loc	27	529	0
	sub.ftz.f32 	%f1600, %f1566, %f1574;
	mul.ftz.f32 	%f1601, %f1592, %f1600;
	div.approx.ftz.f32 	%f1602, %f1601, %f1593;
	.loc	22	127	0
	add.ftz.f32 	%f1566, %f1602, %f1574;
$Lt_123_311810:
	.loc	22	468	0
	ld.param.f32 	%f1603, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1603, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1604, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1604;
	mov.f32 	%f1605, %f190;
	mov.f32 	%f1606, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1607, %f190, %f1606;
	mov.f32 	%f1608, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p137, %f1607, %f1608;
	@!%p137 bra 	$Lt_123_312578;
	mov.f32 	%f1609, 0f00000000;  	// 0
	mov.f32 	%f1610, 0f00000000;  	// 0
	mov.f32 	%f1611, 0f00000000;  	// 0
	mov.f32 	%f1605, 0f00000000;  	// 0
	bra.uni 	$Lt_123_312322;
$Lt_123_312578:
	mov.f32 	%f1612, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1612, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1613, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1614, %f187, %f200;
	sub.ftz.f32 	%f204, %f1613, %f1614;
	mul.ftz.f32 	%f1615, %f199, %f1566;
	fma.rn.ftz.f32 	%f1616, %f1566, %f64, %f1615;
	mul.ftz.f32 	%f1617, %f201, %f1616;
	fma.rn.ftz.f32 	%f1611, %f61, %f204, %f1617;
	mul.ftz.f32 	%f1618, %f199, %f1568;
	fma.rn.ftz.f32 	%f1619, %f1568, %f64, %f1618;
	mul.ftz.f32 	%f1620, %f201, %f1619;
	fma.rn.ftz.f32 	%f1610, %f62, %f204, %f1620;
	mul.ftz.f32 	%f1621, %f199, %f1570;
	fma.rn.ftz.f32 	%f1622, %f1570, %f64, %f1621;
	mul.ftz.f32 	%f1623, %f201, %f1622;
	fma.rn.ftz.f32 	%f1609, %f63, %f204, %f1623;
$Lt_123_312322:
	.loc	6	214	0
	mov.f32 	%f57, %f1611;
	mov.f32 	%f58, %f1610;
	mov.f32 	%f59, %f1609;
	mov.f32 	%f60, %f1605;
	bra.uni 	$Lt_123_315906;
$Lt_123_6658:
	.loc	22	113	0
	ld.const.f32 	%f523, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1624, %f58, %f523;
	mul.ftz.f32 	%f1625, %f62, %f523;
	ld.const.f32 	%f522, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1626, %f522, %f59, %f1624;
	fma.rn.ftz.f32 	%f1627, %f522, %f63, %f1625;
	ld.const.f32 	%f521, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1628, %f521, %f57, %f1626;
	fma.rn.ftz.f32 	%f1629, %f521, %f61, %f1627;
	cvt.ftz.sat.f32.f32 	%f1630, %f1628;
	cvt.ftz.sat.f32.f32 	%f1631, %f1629;
	sub.ftz.f32 	%f1632, %f1631, %f1630;
	add.ftz.f32 	%f1633, %f1632, %f57;
	mov.f32 	%f1634, %f1633;
	add.ftz.f32 	%f1635, %f1632, %f58;
	mov.f32 	%f1636, %f1635;
	add.ftz.f32 	%f1637, %f1632, %f59;
	mov.f32 	%f1638, %f1637;
	.loc	22	50	0
	mul.ftz.f32 	%f1639, %f1635, %f523;
	fma.rn.ftz.f32 	%f1640, %f522, %f1637, %f1639;
	fma.rn.ftz.f32 	%f1641, %f521, %f1633, %f1640;
	cvt.ftz.sat.f32.f32 	%f1642, %f1641;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p138, %f1635, %f1633;
	selp.f32 	%f1643, %f1633, %f1635, %p138;
	setp.lt.ftz.f32 	%p139, %f1643, %f1637;
	selp.f32 	%f1644, %f1643, %f1637, %p139;
	mov.f32 	%f1645, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p140, %f1644, %f1645;
	@!%p140 bra 	$Lt_123_312834;
	.loc	22	119	0
	sub.ftz.f32 	%f1646, %f1642, %f1644;
	sub.ftz.f32 	%f1647, %f1637, %f1642;
	mul.ftz.f32 	%f1648, %f1642, %f1647;
	div.approx.ftz.f32 	%f1649, %f1648, %f1646;
	add.ftz.f32 	%f1638, %f1642, %f1649;
	.loc	22	120	0
	sub.ftz.f32 	%f1650, %f1635, %f1642;
	mul.ftz.f32 	%f1651, %f1642, %f1650;
	div.approx.ftz.f32 	%f1652, %f1651, %f1646;
	add.ftz.f32 	%f1636, %f1642, %f1652;
	.loc	22	121	0
	sub.ftz.f32 	%f1653, %f1633, %f1642;
	mul.ftz.f32 	%f1654, %f1642, %f1653;
	div.approx.ftz.f32 	%f1655, %f1654, %f1646;
	add.ftz.f32 	%f1634, %f1642, %f1655;
$Lt_123_312834:
	max.ftz.f32 	%f1656, %f1635, %f1633;
	max.ftz.f32 	%f1657, %f1656, %f1637;
	mov.f32 	%f1658, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p141, %f1657, %f1658;
	@!%p141 bra 	$Lt_123_313346;
	.loc	27	529	0
	mov.f32 	%f1659, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1660, %f1659, %f1642;
	sub.ftz.f32 	%f1661, %f1657, %f1642;
	sub.ftz.f32 	%f1662, %f1638, %f1642;
	mul.ftz.f32 	%f1663, %f1660, %f1662;
	div.approx.ftz.f32 	%f1664, %f1663, %f1661;
	.loc	22	125	0
	add.ftz.f32 	%f1638, %f1664, %f1642;
	.loc	27	529	0
	sub.ftz.f32 	%f1665, %f1636, %f1642;
	mul.ftz.f32 	%f1666, %f1660, %f1665;
	div.approx.ftz.f32 	%f1667, %f1666, %f1661;
	.loc	22	126	0
	add.ftz.f32 	%f1636, %f1667, %f1642;
	.loc	27	529	0
	sub.ftz.f32 	%f1668, %f1634, %f1642;
	mul.ftz.f32 	%f1669, %f1660, %f1668;
	div.approx.ftz.f32 	%f1670, %f1669, %f1661;
	.loc	22	127	0
	add.ftz.f32 	%f1634, %f1670, %f1642;
$Lt_123_313346:
	.loc	22	468	0
	ld.param.f32 	%f1671, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1671, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1672, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1672;
	mov.f32 	%f1673, %f190;
	mov.f32 	%f1674, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1675, %f190, %f1674;
	mov.f32 	%f1676, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p142, %f1675, %f1676;
	@!%p142 bra 	$Lt_123_314114;
	mov.f32 	%f1677, 0f00000000;  	// 0
	mov.f32 	%f1678, 0f00000000;  	// 0
	mov.f32 	%f1679, 0f00000000;  	// 0
	mov.f32 	%f1673, 0f00000000;  	// 0
	bra.uni 	$Lt_123_313858;
$Lt_123_314114:
	mov.f32 	%f1680, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1680, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1681, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1682, %f187, %f200;
	sub.ftz.f32 	%f204, %f1681, %f1682;
	mul.ftz.f32 	%f1683, %f199, %f1634;
	fma.rn.ftz.f32 	%f1684, %f1634, %f64, %f1683;
	mul.ftz.f32 	%f1685, %f201, %f1684;
	fma.rn.ftz.f32 	%f1679, %f61, %f204, %f1685;
	mul.ftz.f32 	%f1686, %f199, %f1636;
	fma.rn.ftz.f32 	%f1687, %f1636, %f64, %f1686;
	mul.ftz.f32 	%f1688, %f201, %f1687;
	fma.rn.ftz.f32 	%f1678, %f62, %f204, %f1688;
	mul.ftz.f32 	%f1689, %f199, %f1638;
	fma.rn.ftz.f32 	%f1690, %f1638, %f64, %f1689;
	mul.ftz.f32 	%f1691, %f201, %f1690;
	fma.rn.ftz.f32 	%f1677, %f63, %f204, %f1691;
$Lt_123_313858:
	.loc	6	215	0
	mov.f32 	%f57, %f1679;
	mov.f32 	%f58, %f1678;
	mov.f32 	%f59, %f1677;
	mov.f32 	%f60, %f1673;
	bra.uni 	$Lt_123_315906;
$Lt_123_6914:
	.loc	22	113	0
	ld.const.f32 	%f523, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1692, %f58, %f523;
	mul.ftz.f32 	%f1693, %f62, %f523;
	ld.const.f32 	%f522, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1694, %f522, %f59, %f1692;
	fma.rn.ftz.f32 	%f1695, %f522, %f63, %f1693;
	ld.const.f32 	%f521, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1696, %f521, %f57, %f1694;
	fma.rn.ftz.f32 	%f1697, %f521, %f61, %f1695;
	cvt.ftz.sat.f32.f32 	%f1698, %f1696;
	cvt.ftz.sat.f32.f32 	%f1699, %f1697;
	sub.ftz.f32 	%f1700, %f1698, %f1699;
	add.ftz.f32 	%f1701, %f1700, %f61;
	mov.f32 	%f1702, %f1701;
	add.ftz.f32 	%f1703, %f1700, %f62;
	mov.f32 	%f1704, %f1703;
	add.ftz.f32 	%f1705, %f1700, %f63;
	mov.f32 	%f1706, %f1705;
	.loc	22	50	0
	mul.ftz.f32 	%f1707, %f1703, %f523;
	fma.rn.ftz.f32 	%f1708, %f522, %f1705, %f1707;
	fma.rn.ftz.f32 	%f1709, %f521, %f1701, %f1708;
	cvt.ftz.sat.f32.f32 	%f1710, %f1709;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p143, %f1703, %f1701;
	selp.f32 	%f1711, %f1701, %f1703, %p143;
	setp.lt.ftz.f32 	%p144, %f1711, %f1705;
	selp.f32 	%f1712, %f1711, %f1705, %p144;
	mov.f32 	%f1713, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p145, %f1712, %f1713;
	@!%p145 bra 	$Lt_123_314370;
	.loc	22	119	0
	sub.ftz.f32 	%f1714, %f1710, %f1712;
	sub.ftz.f32 	%f1715, %f1705, %f1710;
	mul.ftz.f32 	%f1716, %f1710, %f1715;
	div.approx.ftz.f32 	%f1717, %f1716, %f1714;
	add.ftz.f32 	%f1706, %f1710, %f1717;
	.loc	22	120	0
	sub.ftz.f32 	%f1718, %f1703, %f1710;
	mul.ftz.f32 	%f1719, %f1710, %f1718;
	div.approx.ftz.f32 	%f1720, %f1719, %f1714;
	add.ftz.f32 	%f1704, %f1710, %f1720;
	.loc	22	121	0
	sub.ftz.f32 	%f1721, %f1701, %f1710;
	mul.ftz.f32 	%f1722, %f1710, %f1721;
	div.approx.ftz.f32 	%f1723, %f1722, %f1714;
	add.ftz.f32 	%f1702, %f1710, %f1723;
$Lt_123_314370:
	max.ftz.f32 	%f1724, %f1703, %f1701;
	max.ftz.f32 	%f1725, %f1724, %f1705;
	mov.f32 	%f1726, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p146, %f1725, %f1726;
	@!%p146 bra 	$Lt_123_314882;
	.loc	27	529	0
	mov.f32 	%f1727, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1728, %f1727, %f1710;
	sub.ftz.f32 	%f1729, %f1725, %f1710;
	sub.ftz.f32 	%f1730, %f1706, %f1710;
	mul.ftz.f32 	%f1731, %f1728, %f1730;
	div.approx.ftz.f32 	%f1732, %f1731, %f1729;
	.loc	22	125	0
	add.ftz.f32 	%f1706, %f1732, %f1710;
	.loc	27	529	0
	sub.ftz.f32 	%f1733, %f1704, %f1710;
	mul.ftz.f32 	%f1734, %f1728, %f1733;
	div.approx.ftz.f32 	%f1735, %f1734, %f1729;
	.loc	22	126	0
	add.ftz.f32 	%f1704, %f1735, %f1710;
	.loc	27	529	0
	sub.ftz.f32 	%f1736, %f1702, %f1710;
	mul.ftz.f32 	%f1737, %f1728, %f1736;
	div.approx.ftz.f32 	%f1738, %f1737, %f1729;
	.loc	22	127	0
	add.ftz.f32 	%f1702, %f1738, %f1710;
$Lt_123_314882:
	.loc	22	468	0
	ld.param.f32 	%f1739, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f187, %f1739, %f60;
	add.ftz.f32 	%f188, %f187, %f64;
	mul.ftz.f32 	%f1740, %f187, %f64;
	sub.ftz.f32 	%f190, %f188, %f1740;
	mov.f32 	%f1741, %f190;
	mov.f32 	%f1742, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1743, %f190, %f1742;
	mov.f32 	%f1744, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p147, %f1743, %f1744;
	@!%p147 bra 	$Lt_123_315650;
	mov.f32 	%f1745, 0f00000000;  	// 0
	mov.f32 	%f1746, 0f00000000;  	// 0
	mov.f32 	%f1747, 0f00000000;  	// 0
	mov.f32 	%f1741, 0f00000000;  	// 0
	bra.uni 	$Lt_123_315394;
$Lt_123_315650:
	mov.f32 	%f1748, 0f3f800000;  	// 1
	sub.ftz.f32 	%f199, %f1748, %f64;
	rcp.approx.ftz.f32 	%f200, %f190;
	mul.ftz.f32 	%f201, %f200, %f187;
	mov.f32 	%f1749, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1750, %f187, %f200;
	sub.ftz.f32 	%f204, %f1749, %f1750;
	mul.ftz.f32 	%f1751, %f199, %f1702;
	fma.rn.ftz.f32 	%f1752, %f1702, %f64, %f1751;
	mul.ftz.f32 	%f1753, %f201, %f1752;
	fma.rn.ftz.f32 	%f1747, %f61, %f204, %f1753;
	mul.ftz.f32 	%f1754, %f199, %f1704;
	fma.rn.ftz.f32 	%f1755, %f1704, %f64, %f1754;
	mul.ftz.f32 	%f1756, %f201, %f1755;
	fma.rn.ftz.f32 	%f1746, %f62, %f204, %f1756;
	mul.ftz.f32 	%f1757, %f199, %f1706;
	fma.rn.ftz.f32 	%f1758, %f1706, %f64, %f1757;
	mul.ftz.f32 	%f1759, %f201, %f1758;
	fma.rn.ftz.f32 	%f1745, %f63, %f204, %f1759;
$Lt_123_315394:
	.loc	6	216	0
	mov.f32 	%f57, %f1747;
	mov.f32 	%f58, %f1746;
	mov.f32 	%f59, %f1745;
	mov.f32 	%f60, %f1741;
	bra.uni 	$Lt_123_315906;
$Lt_123_271618:
	.loc	6	218	0
	@!%p10 bra 	$Lt_123_315906;
	.loc	6	226	0
	cvt.ftz.sat.f32.f32 	%f1760, %f60;
	.loc	6	243	0
	ld.param.f32 	%f1761, [__cudaparm_MotionKernel_inAlphaGain];
	mul.ftz.f32 	%f60, %f1761, %f1760;
$Lt_123_315906:
$Lt_123_271362:
	@!%p8 bra 	$Lt_123_316674;
	.loc	21	126	0
	mul.lo.u64 	%rd14, %rd8, 8;
	add.u64 	%rd15, %rd9, %rd14;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f57;
	mov.b32		%r122, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f58;
	mov.b32		%r123, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f59;
	mov.b32		%r124, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f60;
	mov.b32		%r125, %b1; }
	st.global.v4.u16 	[%rd15+0], {%r122,%r123,%r124,%r125};
	.loc	6	246	0
	bra.uni 	$Lt_123_317954;
$Lt_123_316674:
	.loc	21	126	0
	mul.lo.u64 	%rd16, %rd8, 16;
	add.u64 	%rd17, %rd9, %rd16;
	st.global.v4.f32 	[%rd17+0], {%f57,%f58,%f59,%f60};
$Lt_123_317954:
$L_123_268290:
	.loc	6	335	0
	exit;
$LDWend_MotionKernel:
	} // MotionKernel

	.entry TranslateKernel (
		.param .u64 __cudaparm_TranslateKernel_inSrc,
		.param .s32 __cudaparm_TranslateKernel_inSrcPitch,
		.param .s32 __cudaparm_TranslateKernel_inSrcWidth,
		.param .s32 __cudaparm_TranslateKernel_inSrcHeight,
		.param .u64 __cudaparm_TranslateKernel_inDest,
		.param .s32 __cudaparm_TranslateKernel_inDestPitch,
		.param .s32 __cudaparm_TranslateKernel_inDestWidth,
		.param .s32 __cudaparm_TranslateKernel_inDestHeight,
		.param .u32 __cudaparm_TranslateKernel_inDeviceFormat,
		.param .s32 __cudaparm_TranslateKernel_inTranslateX,
		.param .s32 __cudaparm_TranslateKernel_inTranslateY,
		.param .f32 __cudaparm_TranslateKernel_inAlphaGain,
		.param .u32 __cudaparm_TranslateKernel_inBlendMode,
		.param .s8 __cudaparm_TranslateKernel_inDoCompositeOver)
	{
	.reg .u32 %r<122>;
	.reg .u64 %rd<18>;
	.reg .f32 %f<1711>;
	.reg .pred %p<147>;
	.loc	6	355	0
$LDWbegin_TranslateKernel:
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	mov.u32 	%r4, %tid.x;
	add.u32 	%r5, %r3, %r4;
	ld.param.s32 	%r6, [__cudaparm_TranslateKernel_inSrcWidth];
	setp.le.s32 	%p1, %r6, %r5;
	@%p1 bra 	$Lt_124_315394;
	cvt.s32.u32 	%r7, %ctaid.y;
	cvt.s32.u32 	%r8, %ntid.y;
	mul.lo.s32 	%r9, %r7, %r8;
	mov.u32 	%r10, %tid.y;
	add.u32 	%r11, %r9, %r10;
	ld.param.s32 	%r12, [__cudaparm_TranslateKernel_inSrcHeight];
	setp.le.s32 	%p2, %r12, %r11;
	@%p2 bra 	$Lt_124_315394;
	ld.param.s32 	%r13, [__cudaparm_TranslateKernel_inTranslateX];
	add.s32 	%r14, %r13, %r5;
	mov.u32 	%r15, 0;
	setp.lt.s32 	%p3, %r14, %r15;
	@%p3 bra 	$Lt_124_315394;
	ld.param.s32 	%r16, [__cudaparm_TranslateKernel_inDestWidth];
	setp.ge.s32 	%p4, %r14, %r16;
	@%p4 bra 	$Lt_124_315394;
	ld.param.s32 	%r17, [__cudaparm_TranslateKernel_inTranslateY];
	add.s32 	%r18, %r17, %r11;
	mov.u32 	%r19, 0;
	setp.lt.s32 	%p5, %r18, %r19;
	@%p5 bra 	$Lt_124_315394;
	ld.param.s32 	%r20, [__cudaparm_TranslateKernel_inDestHeight];
	setp.le.s32 	%p6, %r20, %r18;
	@%p6 bra 	$Lt_124_315394;
	ld.param.s32 	%r21, [__cudaparm_TranslateKernel_inDeviceFormat];
	mov.s32 	%r22, 0;
	setp.eq.s32 	%p7, %r21, %r22;
	ld.param.s32 	%r23, [__cudaparm_TranslateKernel_inSrcPitch];
	mul.lo.s32 	%r24, %r23, %r11;
	add.s32 	%r25, %r5, %r24;
	cvt.s64.s32 	%rd1, %r25;
	ld.param.u64 	%rd2, [__cudaparm_TranslateKernel_inSrc];
	@!%p7 bra 	$Lt_124_268546;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r26,%r27,%r28,%r29}, [%rd4+0];
	.loc	6	367	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r28;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r29;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_124_268290;
$Lt_124_268546:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_124_268290:
	mov.f32 	%f5, %f1;
	mov.f32 	%f6, %f2;
	mov.f32 	%f7, %f3;
	mov.f32 	%f8, %f4;
	ld.param.u32 	%r30, [__cudaparm_TranslateKernel_inBlendMode];
	mov.s32 	%r31, 18;
	setp.eq.s32 	%p8, %r30, %r31;
	ld.param.s32 	%r32, [__cudaparm_TranslateKernel_inDestPitch];
	mul.lo.s32 	%r33, %r32, %r18;
	add.s32 	%r34, %r14, %r33;
	cvt.s64.s32 	%rd7, %r34;
	ld.param.u64 	%rd8, [__cudaparm_TranslateKernel_inDest];
	ld.param.s8 	%r35, [__cudaparm_TranslateKernel_inDoCompositeOver];
	mov.u32 	%r36, 0;
	setp.eq.s32 	%p9, %r35, %r36;
	@%p9 bra 	$Lt_124_269058;
	@!%p7 bra 	$Lt_124_269570;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r37,%r38,%r39,%r40}, [%rd10+0];
	.loc	6	166	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r37;
	cvt.ftz.f32.f16	%f9, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r38;
	cvt.ftz.f32.f16	%f10, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r39;
	cvt.ftz.f32.f16	%f11, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r40;
	cvt.ftz.f32.f16	%f12, %b1; }
	bra.uni 	$Lt_124_269314;
$Lt_124_269570:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f9,%f10,%f11,%f12}, [%rd12+0];
$Lt_124_269314:
	@!%p8 bra 	$Lt_124_270082;
	.loc	5	255	0
	mov.f32 	%f13, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p10, %f5, %f13;
	@!%p10 bra 	$Lt_124_270338;
	.loc	5	234	0
	neg.ftz.f32 	%f14, %f5;
	lg2.approx.ftz.f32 	%f15, %f14;
	mov.f32 	%f16, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f17, %f15, %f16;
	ex2.approx.ftz.f32 	%f18, %f17;
	neg.ftz.f32 	%f19, %f18;
	bra.uni 	$LDWendi___log2f_301_80;
$Lt_124_270338:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f20, %f5;
	mov.f32 	%f21, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f22, %f20, %f21;
	ex2.approx.ftz.f32 	%f19, %f22;
$LDWendi___log2f_301_80:
	.loc	5	256	0
	mov.f32 	%f23, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p11, %f6, %f23;
	@!%p11 bra 	$Lt_124_270850;
	.loc	5	234	0
	neg.ftz.f32 	%f24, %f6;
	lg2.approx.ftz.f32 	%f25, %f24;
	mov.f32 	%f26, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f27, %f25, %f26;
	ex2.approx.ftz.f32 	%f28, %f27;
	neg.ftz.f32 	%f29, %f28;
	bra.uni 	$LDWendi___log2f_301_78;
$Lt_124_270850:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f30, %f6;
	mov.f32 	%f31, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f32, %f30, %f31;
	ex2.approx.ftz.f32 	%f29, %f32;
$LDWendi___log2f_301_78:
	.loc	5	257	0
	mov.f32 	%f33, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p12, %f7, %f33;
	@!%p12 bra 	$Lt_124_271362;
	.loc	5	234	0
	neg.ftz.f32 	%f34, %f7;
	lg2.approx.ftz.f32 	%f35, %f34;
	mov.f32 	%f36, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f37, %f35, %f36;
	ex2.approx.ftz.f32 	%f38, %f37;
	neg.ftz.f32 	%f39, %f38;
	bra.uni 	$LDWendi___log2f_301_76;
$Lt_124_271362:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f40, %f7;
	mov.f32 	%f41, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f42, %f40, %f41;
	ex2.approx.ftz.f32 	%f39, %f42;
$LDWendi___log2f_301_76:
	.loc	22	83	0
	cvt.ftz.sat.f32.f32 	%f43, %f8;
	cvt.ftz.sat.f32.f32 	%f44, %f12;
	ld.param.f32 	%f45, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f46, %f45, %f43;
	cvt.ftz.sat.f32.f32 	%f47, %f46;
	mov.f32 	%f48, 0f3f800000;    	// 1
	sub.ftz.f32 	%f49, %f48, %f47;
	mul.ftz.f32 	%f50, %f44, %f49;
	add.ftz.f32 	%f51, %f47, %f50;
	mov.f32 	%f52, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f53, %f51, %f52;
	mov.f32 	%f54, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p13, %f53, %f54;
	@!%p13 bra 	$Lt_124_272130;
	mov.f32 	%f55, 0f00000000;    	// 0
	mov.f32 	%f56, 0f00000000;    	// 0
	mov.f32 	%f57, 0f00000000;    	// 0
	mov.f32 	%f58, 0f00000000;    	// 0
	bra.uni 	$Lt_124_271874;
$Lt_124_272130:
	mov.f32 	%f59, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p14, %f9, %f59;
	@!%p14 bra 	$Lt_124_272386;
	.loc	5	234	0
	neg.ftz.f32 	%f60, %f9;
	lg2.approx.ftz.f32 	%f61, %f60;
	mov.f32 	%f62, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f63, %f61, %f62;
	ex2.approx.ftz.f32 	%f64, %f63;
	neg.ftz.f32 	%f65, %f64;
	bra.uni 	$LDWendi___log2f_301_74;
$Lt_124_272386:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f66, %f9;
	mov.f32 	%f67, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f68, %f66, %f67;
	ex2.approx.ftz.f32 	%f65, %f68;
$LDWendi___log2f_301_74:
	.loc	22	97	0
	mov.f32 	%f69, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p15, %f10, %f69;
	@!%p15 bra 	$Lt_124_272898;
	.loc	5	234	0
	neg.ftz.f32 	%f70, %f10;
	lg2.approx.ftz.f32 	%f71, %f70;
	mov.f32 	%f72, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f73, %f71, %f72;
	ex2.approx.ftz.f32 	%f74, %f73;
	neg.ftz.f32 	%f75, %f74;
	bra.uni 	$LDWendi___log2f_301_72;
$Lt_124_272898:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f76, %f10;
	mov.f32 	%f77, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f78, %f76, %f77;
	ex2.approx.ftz.f32 	%f75, %f78;
$LDWendi___log2f_301_72:
	.loc	22	98	0
	mov.f32 	%f79, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p16, %f11, %f79;
	@!%p16 bra 	$Lt_124_273410;
	.loc	5	234	0
	neg.ftz.f32 	%f80, %f11;
	lg2.approx.ftz.f32 	%f81, %f80;
	mov.f32 	%f82, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f83, %f81, %f82;
	ex2.approx.ftz.f32 	%f84, %f83;
	neg.ftz.f32 	%f85, %f84;
	bra.uni 	$LDWendi___log2f_301_70;
$Lt_124_273410:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f86, %f11;
	mov.f32 	%f87, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f88, %f86, %f87;
	ex2.approx.ftz.f32 	%f85, %f88;
$LDWendi___log2f_301_70:
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f89, %f51;
	mov.f32 	%f90, %f89;
	mov.f32 	%f91, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f92, %f89, %f91;
	mov.f32 	%f93, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p17, %f92, %f93;
	@%p17 bra 	$Lt_124_274178;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f94, %f89;
	mul.ftz.f32 	%f95, %f39, %f47;
	fma.rn.ftz.f32 	%f96, %f50, %f85, %f95;
	mul.ftz.f32 	%f97, %f94, %f96;
	.loc	5	214	0
	mul.ftz.f32 	%f98, %f29, %f47;
	fma.rn.ftz.f32 	%f99, %f50, %f75, %f98;
	mul.ftz.f32 	%f100, %f94, %f99;
	.loc	5	215	0
	mul.ftz.f32 	%f101, %f19, %f47;
	fma.rn.ftz.f32 	%f102, %f50, %f65, %f101;
	mul.ftz.f32 	%f103, %f94, %f102;
	bra.uni 	$Lt_124_273922;
$Lt_124_274178:
	.loc	5	219	0
	mov.f32 	%f97, 0f00000000;    	// 0
	mov.f32 	%f100, 0f00000000;   	// 0
	mov.f32 	%f103, 0f00000000;   	// 0
	mov.f32 	%f90, 0f00000000;    	// 0
$Lt_124_273922:
	.loc	5	266	0
	mov.f32 	%f104, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p18, %f103, %f104;
	@!%p18 bra 	$Lt_124_274434;
	.loc	5	242	0
	neg.ftz.f32 	%f105, %f103;
	lg2.approx.ftz.f32 	%f106, %f105;
	mov.f32 	%f107, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f108, %f106, %f107;
	ex2.approx.ftz.f32 	%f109, %f108;
	neg.ftz.f32 	%f110, %f109;
	bra.uni 	$LDWendi___log2f_301_68;
$Lt_124_274434:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f111, %f103;
	mov.f32 	%f112, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f113, %f111, %f112;
	ex2.approx.ftz.f32 	%f110, %f113;
$LDWendi___log2f_301_68:
	.loc	5	267	0
	mov.f32 	%f114, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p19, %f100, %f114;
	@!%p19 bra 	$Lt_124_274946;
	.loc	5	242	0
	neg.ftz.f32 	%f115, %f100;
	lg2.approx.ftz.f32 	%f116, %f115;
	mov.f32 	%f117, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f118, %f116, %f117;
	ex2.approx.ftz.f32 	%f119, %f118;
	neg.ftz.f32 	%f120, %f119;
	bra.uni 	$LDWendi___log2f_301_66;
$Lt_124_274946:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f121, %f100;
	mov.f32 	%f122, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f123, %f121, %f122;
	ex2.approx.ftz.f32 	%f120, %f123;
$LDWendi___log2f_301_66:
	.loc	5	268	0
	mov.f32 	%f124, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p20, %f97, %f124;
	@!%p20 bra 	$Lt_124_275458;
	.loc	5	242	0
	neg.ftz.f32 	%f125, %f97;
	lg2.approx.ftz.f32 	%f126, %f125;
	mov.f32 	%f127, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f128, %f126, %f127;
	ex2.approx.ftz.f32 	%f129, %f128;
	neg.ftz.f32 	%f130, %f129;
	bra.uni 	$LDWendi___log2f_301_64;
$Lt_124_275458:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f131, %f97;
	mov.f32 	%f132, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f133, %f131, %f132;
	ex2.approx.ftz.f32 	%f130, %f133;
$LDWendi___log2f_301_64:
	.loc	22	101	0
	mov.f32 	%f58, %f110;
	mov.f32 	%f57, %f120;
	mov.f32 	%f56, %f130;
	mov.f32 	%f55, %f90;
$Lt_124_271874:
	.loc	6	170	0
	mov.f32 	%f5, %f58;
	mov.f32 	%f6, %f57;
	mov.f32 	%f7, %f56;
	mov.f32 	%f8, %f55;
	bra.uni 	$Lt_124_313346;
$Lt_124_270082:
	.loc	6	189	0
	mov.u32 	%r41, 3;
	setp.eq.s32 	%p21, %r30, %r41;
	@%p21 bra 	$Lt_124_258;
	mov.u32 	%r42, 11;
	setp.eq.s32 	%p22, %r30, %r42;
	@%p22 bra 	$Lt_124_770;
	mov.u32 	%r43, 17;
	setp.eq.s32 	%p23, %r30, %r43;
	@%p23 bra 	$Lt_124_1026;
	mov.u32 	%r44, 22;
	setp.eq.s32 	%p24, %r30, %r44;
	@%p24 bra 	$Lt_124_1282;
	mov.u32 	%r45, 6;
	setp.eq.s32 	%p25, %r30, %r45;
	@%p25 bra 	$Lt_124_1538;
	mov.u32 	%r46, 1;
	setp.eq.s32 	%p26, %r30, %r46;
	@%p26 bra 	$Lt_124_1794;
	mov.u32 	%r47, 13;
	setp.eq.s32 	%p27, %r30, %r47;
	@%p27 bra 	$Lt_124_2050;
	mov.u32 	%r48, 4;
	setp.eq.s32 	%p28, %r30, %r48;
	@%p28 bra 	$Lt_124_2306;
	mov.u32 	%r49, 2;
	setp.eq.s32 	%p29, %r30, %r49;
	@%p29 bra 	$Lt_124_2562;
	mov.u32 	%r50, 14;
	setp.eq.s32 	%p30, %r30, %r50;
	@%p30 bra 	$Lt_124_2818;
	mov.u32 	%r51, 12;
	setp.eq.s32 	%p31, %r30, %r51;
	@%p31 bra 	$Lt_124_3074;
	mov.u32 	%r52, 19;
	setp.eq.s32 	%p32, %r30, %r52;
	@%p32 bra 	$Lt_124_3330;
	mov.u32 	%r53, 23;
	setp.eq.s32 	%p33, %r30, %r53;
	@%p33 bra 	$Lt_124_3586;
	mov.u32 	%r54, 8;
	setp.eq.s32 	%p34, %r30, %r54;
	@%p34 bra 	$Lt_124_3842;
	mov.u32 	%r55, 24;
	setp.eq.s32 	%p35, %r30, %r55;
	@%p35 bra 	$Lt_124_4098;
	mov.u32 	%r56, 15;
	setp.eq.s32 	%p36, %r30, %r56;
	@%p36 bra 	$Lt_124_4354;
	mov.u32 	%r57, 20;
	setp.eq.s32 	%p37, %r30, %r57;
	@%p37 bra 	$Lt_124_4610;
	mov.u32 	%r58, 9;
	setp.eq.s32 	%p38, %r30, %r58;
	@%p38 bra 	$Lt_124_4866;
	mov.u32 	%r59, 5;
	setp.eq.s32 	%p39, %r30, %r59;
	@%p39 bra 	$Lt_124_5122;
	mov.u32 	%r60, 7;
	setp.eq.s32 	%p40, %r30, %r60;
	@%p40 bra 	$Lt_124_5378;
	mov.u32 	%r61, 25;
	setp.eq.s32 	%p41, %r30, %r61;
	@%p41 bra 	$Lt_124_5634;
	mov.u32 	%r62, 26;
	setp.eq.s32 	%p42, %r30, %r62;
	@%p42 bra 	$Lt_124_5890;
	mov.u32 	%r63, 10;
	setp.eq.s32 	%p43, %r30, %r63;
	@%p43 bra 	$Lt_124_6146;
	mov.u32 	%r64, 21;
	setp.eq.s32 	%p44, %r30, %r64;
	@%p44 bra 	$Lt_124_6402;
	mov.u32 	%r65, 0;
	setp.eq.s32 	%p45, %r30, %r65;
	@%p45 bra 	$Lt_124_6658;
	mov.u32 	%r66, 16;
	setp.eq.s32 	%p46, %r30, %r66;
	@%p46 bra 	$Lt_124_6914;
	bra.uni 	$Lt_124_313346;
$Lt_124_258:
	.loc	22	469	0
	ld.param.f32 	%f134, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f134, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f137, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f137;
	mov.f32 	%f139, %f138;
	mov.f32 	%f140, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f141, %f138, %f140;
	mov.f32 	%f142, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p47, %f141, %f142;
	@!%p47 bra 	$Lt_124_276226;
	mov.f32 	%f143, 0f00000000;   	// 0
	mov.f32 	%f144, 0f00000000;   	// 0
	mov.f32 	%f145, 0f00000000;   	// 0
	mov.f32 	%f139, 0f00000000;   	// 0
	bra.uni 	$Lt_124_275970;
$Lt_124_276226:
	mov.f32 	%f146, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f146, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f150, 0f3f800000;   	// 1
	mul.ftz.f32 	%f151, %f135, %f148;
	sub.ftz.f32 	%f152, %f150, %f151;
	min.ftz.f32 	%f153, %f5, %f9;
	mul.ftz.f32 	%f154, %f12, %f153;
	fma.rn.ftz.f32 	%f155, %f5, %f147, %f154;
	mul.ftz.f32 	%f156, %f149, %f155;
	fma.rn.ftz.f32 	%f145, %f9, %f152, %f156;
	min.ftz.f32 	%f157, %f6, %f10;
	mul.ftz.f32 	%f158, %f12, %f157;
	fma.rn.ftz.f32 	%f159, %f6, %f147, %f158;
	mul.ftz.f32 	%f160, %f149, %f159;
	fma.rn.ftz.f32 	%f144, %f10, %f152, %f160;
	min.ftz.f32 	%f161, %f7, %f11;
	mul.ftz.f32 	%f162, %f12, %f161;
	fma.rn.ftz.f32 	%f163, %f7, %f147, %f162;
	mul.ftz.f32 	%f164, %f149, %f163;
	fma.rn.ftz.f32 	%f143, %f11, %f152, %f164;
$Lt_124_275970:
	.loc	6	191	0
	mov.f32 	%f5, %f145;
	mov.f32 	%f6, %f144;
	mov.f32 	%f7, %f143;
	mov.f32 	%f8, %f139;
	bra.uni 	$Lt_124_313346;
$Lt_124_770:
	.loc	22	470	0
	ld.param.f32 	%f165, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f165, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f166, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f166;
	mov.f32 	%f167, %f138;
	mov.f32 	%f168, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f169, %f138, %f168;
	mov.f32 	%f170, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p48, %f169, %f170;
	@!%p48 bra 	$Lt_124_276738;
	mov.f32 	%f171, 0f00000000;   	// 0
	mov.f32 	%f172, 0f00000000;   	// 0
	mov.f32 	%f173, 0f00000000;   	// 0
	mov.f32 	%f167, 0f00000000;   	// 0
	bra.uni 	$Lt_124_276482;
$Lt_124_276738:
	mov.f32 	%f174, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f174, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f175, 0f3f800000;   	// 1
	mul.ftz.f32 	%f176, %f135, %f148;
	sub.ftz.f32 	%f152, %f175, %f176;
	max.ftz.f32 	%f177, %f5, %f9;
	mul.ftz.f32 	%f178, %f12, %f177;
	fma.rn.ftz.f32 	%f179, %f5, %f147, %f178;
	mul.ftz.f32 	%f180, %f149, %f179;
	fma.rn.ftz.f32 	%f173, %f9, %f152, %f180;
	max.ftz.f32 	%f181, %f6, %f10;
	mul.ftz.f32 	%f182, %f12, %f181;
	fma.rn.ftz.f32 	%f183, %f6, %f147, %f182;
	mul.ftz.f32 	%f184, %f149, %f183;
	fma.rn.ftz.f32 	%f172, %f10, %f152, %f184;
	max.ftz.f32 	%f185, %f7, %f11;
	mul.ftz.f32 	%f186, %f12, %f185;
	fma.rn.ftz.f32 	%f187, %f7, %f147, %f186;
	mul.ftz.f32 	%f188, %f149, %f187;
	fma.rn.ftz.f32 	%f171, %f11, %f152, %f188;
$Lt_124_276482:
	.loc	6	192	0
	mov.f32 	%f5, %f173;
	mov.f32 	%f6, %f172;
	mov.f32 	%f7, %f171;
	mov.f32 	%f8, %f167;
	bra.uni 	$Lt_124_313346;
$Lt_124_1026:
	.loc	22	471	0
	ld.param.f32 	%f189, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f189, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f190, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f190;
	mov.f32 	%f191, %f138;
	mov.f32 	%f192, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f193, %f138, %f192;
	mov.f32 	%f194, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p49, %f193, %f194;
	@!%p49 bra 	$Lt_124_277250;
	mov.f32 	%f195, 0f00000000;   	// 0
	mov.f32 	%f196, 0f00000000;   	// 0
	mov.f32 	%f197, 0f00000000;   	// 0
	mov.f32 	%f191, 0f00000000;   	// 0
	bra.uni 	$Lt_124_276994;
$Lt_124_277250:
	mov.f32 	%f198, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f198, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f199, 0f3f800000;   	// 1
	mul.ftz.f32 	%f200, %f135, %f148;
	sub.ftz.f32 	%f152, %f199, %f200;
	mul.ftz.f32 	%f201, %f5, %f9;
	mov.f32 	%f202, 0f00000000;   	// 0
	max.ftz.f32 	%f203, %f201, %f202;
	mov.f32 	%f204, 0f3f800000;   	// 1
	min.ftz.f32 	%f205, %f203, %f204;
	mul.ftz.f32 	%f206, %f12, %f205;
	fma.rn.ftz.f32 	%f207, %f5, %f147, %f206;
	mul.ftz.f32 	%f208, %f149, %f207;
	fma.rn.ftz.f32 	%f197, %f9, %f152, %f208;
	mul.ftz.f32 	%f209, %f6, %f10;
	mov.f32 	%f210, 0f00000000;   	// 0
	max.ftz.f32 	%f211, %f209, %f210;
	mov.f32 	%f212, 0f3f800000;   	// 1
	min.ftz.f32 	%f213, %f211, %f212;
	mul.ftz.f32 	%f214, %f12, %f213;
	fma.rn.ftz.f32 	%f215, %f6, %f147, %f214;
	mul.ftz.f32 	%f216, %f149, %f215;
	fma.rn.ftz.f32 	%f196, %f10, %f152, %f216;
	mul.ftz.f32 	%f217, %f7, %f11;
	mov.f32 	%f218, 0f00000000;   	// 0
	max.ftz.f32 	%f219, %f217, %f218;
	mov.f32 	%f220, 0f3f800000;   	// 1
	min.ftz.f32 	%f221, %f219, %f220;
	mul.ftz.f32 	%f222, %f12, %f221;
	fma.rn.ftz.f32 	%f223, %f7, %f147, %f222;
	mul.ftz.f32 	%f224, %f149, %f223;
	fma.rn.ftz.f32 	%f195, %f11, %f152, %f224;
$Lt_124_276994:
	.loc	6	193	0
	mov.f32 	%f5, %f197;
	mov.f32 	%f6, %f196;
	mov.f32 	%f7, %f195;
	mov.f32 	%f8, %f191;
	bra.uni 	$Lt_124_313346;
$Lt_124_1282:
	.loc	22	472	0
	ld.param.f32 	%f225, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f225, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f226, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f226;
	mov.f32 	%f227, %f138;
	mov.f32 	%f228, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f229, %f138, %f228;
	mov.f32 	%f230, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p50, %f229, %f230;
	@!%p50 bra 	$Lt_124_277762;
	mov.f32 	%f231, 0f00000000;   	// 0
	mov.f32 	%f232, 0f00000000;   	// 0
	mov.f32 	%f233, 0f00000000;   	// 0
	mov.f32 	%f227, 0f00000000;   	// 0
	bra.uni 	$Lt_124_277506;
$Lt_124_277762:
	mov.f32 	%f234, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f234, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f235, 0f3f800000;   	// 1
	mul.ftz.f32 	%f236, %f135, %f148;
	sub.ftz.f32 	%f152, %f235, %f236;
	mov.f32 	%f237, 0f3f800000;   	// 1
	mov.f32 	%f238, 0f3f800000;   	// 1
	mov.f32 	%f239, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f240, %f9, %f239;
	mov.f32 	%f241, 0f3f800000;   	// 1
	min.ftz.f32 	%f242, %f240, %f241;
	sub.ftz.f32 	%f243, %f238, %f242;
	mov.f32 	%f244, 0f3f800000;   	// 1
	mov.f32 	%f245, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f246, %f5, %f245;
	mov.f32 	%f247, 0f3f800000;   	// 1
	min.ftz.f32 	%f248, %f246, %f247;
	sub.ftz.f32 	%f249, %f244, %f248;
	mul.ftz.f32 	%f250, %f243, %f249;
	sub.ftz.f32 	%f251, %f237, %f250;
	mov.f32 	%f252, 0f00000000;   	// 0
	max.ftz.f32 	%f253, %f251, %f252;
	mov.f32 	%f254, 0f3f800000;   	// 1
	min.ftz.f32 	%f255, %f253, %f254;
	mul.ftz.f32 	%f256, %f12, %f255;
	fma.rn.ftz.f32 	%f257, %f5, %f147, %f256;
	mul.ftz.f32 	%f258, %f149, %f257;
	fma.rn.ftz.f32 	%f233, %f9, %f152, %f258;
	mov.f32 	%f259, 0f3f800000;   	// 1
	mov.f32 	%f260, 0f3f800000;   	// 1
	mov.f32 	%f261, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f262, %f10, %f261;
	mov.f32 	%f263, 0f3f800000;   	// 1
	min.ftz.f32 	%f264, %f262, %f263;
	sub.ftz.f32 	%f265, %f260, %f264;
	mov.f32 	%f266, 0f3f800000;   	// 1
	mov.f32 	%f267, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f268, %f6, %f267;
	mov.f32 	%f269, 0f3f800000;   	// 1
	min.ftz.f32 	%f270, %f268, %f269;
	sub.ftz.f32 	%f271, %f266, %f270;
	mul.ftz.f32 	%f272, %f265, %f271;
	sub.ftz.f32 	%f273, %f259, %f272;
	mov.f32 	%f274, 0f00000000;   	// 0
	max.ftz.f32 	%f275, %f273, %f274;
	mov.f32 	%f276, 0f3f800000;   	// 1
	min.ftz.f32 	%f277, %f275, %f276;
	mul.ftz.f32 	%f278, %f12, %f277;
	fma.rn.ftz.f32 	%f279, %f6, %f147, %f278;
	mul.ftz.f32 	%f280, %f149, %f279;
	fma.rn.ftz.f32 	%f232, %f10, %f152, %f280;
	mov.f32 	%f281, 0f3f800000;   	// 1
	mov.f32 	%f282, 0f3f800000;   	// 1
	mov.f32 	%f283, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f284, %f11, %f283;
	mov.f32 	%f285, 0f3f800000;   	// 1
	min.ftz.f32 	%f286, %f284, %f285;
	sub.ftz.f32 	%f287, %f282, %f286;
	mov.f32 	%f288, 0f3f800000;   	// 1
	mov.f32 	%f289, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f290, %f7, %f289;
	mov.f32 	%f291, 0f3f800000;   	// 1
	min.ftz.f32 	%f292, %f290, %f291;
	sub.ftz.f32 	%f293, %f288, %f292;
	mul.ftz.f32 	%f294, %f287, %f293;
	sub.ftz.f32 	%f295, %f281, %f294;
	mov.f32 	%f296, 0f00000000;   	// 0
	max.ftz.f32 	%f297, %f295, %f296;
	mov.f32 	%f298, 0f3f800000;   	// 1
	min.ftz.f32 	%f299, %f297, %f298;
	mul.ftz.f32 	%f300, %f12, %f299;
	fma.rn.ftz.f32 	%f301, %f7, %f147, %f300;
	mul.ftz.f32 	%f302, %f149, %f301;
	fma.rn.ftz.f32 	%f231, %f11, %f152, %f302;
$Lt_124_277506:
	.loc	6	194	0
	mov.f32 	%f5, %f233;
	mov.f32 	%f6, %f232;
	mov.f32 	%f7, %f231;
	mov.f32 	%f8, %f227;
	bra.uni 	$Lt_124_313346;
$Lt_124_1538:
	.loc	22	526	0
	ld.param.f32 	%f303, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f303, %f8;
	mov.f32 	%f304, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f305, %f135, %f304;
	mov.f32 	%f306, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p51, %f305, %f306;
	@!%p51 bra 	$Lt_124_233986;
	.loc	22	528	0
	mov.f32 	%f307, %f9;
	mov.f32 	%f308, %f10;
	mov.f32 	%f309, %f11;
	mov.f32 	%f310, %f12;
	bra.uni 	$LDWendi__Z4Randj_301_56;
$Lt_124_233986:
	.loc	22	530	0
	mov.f32 	%f311, 0f370637bd;   	// 8e-006
	add.ftz.f32 	%f312, %f135, %f311;
	mov.f32 	%f313, 0f3f800000;   	// 1
	setp.ge.ftz.f32 	%p52, %f312, %f313;
	@!%p52 bra 	$Lt_124_234242;
	.loc	22	532	0
	mov.f32 	%f307, %f5;
	mov.f32 	%f308, %f6;
	mov.f32 	%f309, %f7;
	mov.f32 	%f310, %f8;
	bra.uni 	$LDWendi__Z4Randj_301_56;
$Lt_124_234242:
	.loc	21	143	0
	mov.s32 	%r67, 1;
	sub.s32 	%r68, %r67, %r5;
	shr.u32 	%r69, %r11, 13;
	sub.u32 	%r70, %r5, %r11;
	sub.u32 	%r71, %r68, %r11;
	xor.b32 	%r72, %r69, %r71;
	shl.b32 	%r73, %r72, 8;
	sub.u32 	%r74, %r70, %r72;
	sub.u32 	%r75, %r11, %r72;
	xor.b32 	%r76, %r73, %r74;
	shr.u32 	%r77, %r76, 13;
	sub.u32 	%r78, %r75, %r76;
	sub.u32 	%r79, %r72, %r76;
	xor.b32 	%r80, %r77, %r78;
	shr.u32 	%r81, %r80, 12;
	sub.u32 	%r82, %r79, %r80;
	xor.b32 	%r83, %r81, %r82;
	sub.u32 	%r84, %r76, %r80;
	sub.u32 	%r85, %r84, %r83;
	shl.b32 	%r86, %r83, 16;
	xor.b32 	%r87, %r85, %r86;
	.loc	21	144	0
	sub.u32 	%r88, %r80, %r83;
	sub.u32 	%r89, %r88, %r87;
	shr.u32 	%r90, %r87, 5;
	xor.b32 	%r91, %r89, %r90;
	.loc	21	145	0
	sub.u32 	%r92, %r83, %r87;
	sub.u32 	%r93, %r92, %r91;
	shr.u32 	%r94, %r91, 3;
	xor.b32 	%r95, %r93, %r94;
	.loc	21	146	0
	sub.u32 	%r96, %r87, %r91;
	sub.u32 	%r97, %r96, %r95;
	shl.b32 	%r98, %r95, 10;
	xor.b32 	%r99, %r97, %r98;
	.loc	21	147	0
	sub.u32 	%r100, %r91, %r95;
	sub.u32 	%r101, %r100, %r99;
	shr.u32 	%r102, %r99, 15;
	xor.b32 	%r103, %r101, %r102;
	.loc	22	537	0
	mov.f32 	%f314, 0f46fffe00;   	// 32767
	mul.ftz.f32 	%f315, %f135, %f314;
	cvt.rzi.ftz.s32.f32 	%r104, %f315;
	mul.lo.u32 	%r105, %r103, 1103515245;
	add.u32 	%r106, %r105, 12345;
	shr.u32 	%r107, %r106, 16;
	and.b32 	%r108, %r107, 255;
	shl.b32 	%r109, %r108, 7;
	mul.lo.u32 	%r110, %r103, -1029531031;
	sub.u32 	%r111, %r110, 740551042;
	shr.u32 	%r112, %r111, 16;
	and.b32 	%r113, %r112, 255;
	xor.b32 	%r114, %r109, %r113;
	setp.lt.s32 	%p53, %r104, %r114;
	@%p53 bra 	$Lt_124_278274;
	mov.f32 	%f316, %f5;
	mov.f32 	%f317, %f6;
	mov.f32 	%f318, %f7;
	mov.f32 	%f319, %f8;
	bra.uni 	$Lt_124_278018;
$Lt_124_278274:
	mov.f32 	%f316, %f9;
	mov.f32 	%f317, %f10;
	mov.f32 	%f318, %f11;
	mov.f32 	%f319, %f12;
$Lt_124_278018:
	mov.f32 	%f307, %f316;
	mov.f32 	%f308, %f317;
	mov.f32 	%f309, %f318;
	mov.f32 	%f310, %f319;
$LDWendi__Z4Randj_301_56:
	.loc	6	195	0
	mov.f32 	%f5, %f307;
	mov.f32 	%f6, %f308;
	mov.f32 	%f7, %f309;
	mov.f32 	%f8, %f310;
	bra.uni 	$Lt_124_313346;
$Lt_124_1794:
	.loc	22	473	0
	ld.param.f32 	%f320, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f320, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f321, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f321;
	mov.f32 	%f322, %f138;
	mov.f32 	%f323, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f324, %f138, %f323;
	mov.f32 	%f325, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p54, %f324, %f325;
	@!%p54 bra 	$Lt_124_278786;
	mov.f32 	%f326, 0f00000000;   	// 0
	mov.f32 	%f327, 0f00000000;   	// 0
	mov.f32 	%f328, 0f00000000;   	// 0
	mov.f32 	%f322, 0f00000000;   	// 0
	bra.uni 	$Lt_124_278530;
$Lt_124_278786:
	mov.f32 	%f329, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f329, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f330, 0f3f800000;   	// 1
	mul.ftz.f32 	%f331, %f135, %f148;
	sub.ftz.f32 	%f152, %f330, %f331;
	mov.f32 	%f332, 0f3f800000;   	// 1
	mov.f32 	%f333, 0f3f800000;   	// 1
	mov.f32 	%f334, 0f00000000;   	// 0
	max.ftz.f32 	%f335, %f9, %f334;
	mov.f32 	%f336, 0f3f800000;   	// 1
	min.ftz.f32 	%f337, %f335, %f336;
	sub.ftz.f32 	%f338, %f333, %f337;
	mov.f32 	%f339, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f340, %f5, %f339;
	mov.f32 	%f341, 0f3f800000;   	// 1
	min.ftz.f32 	%f342, %f340, %f341;
	div.approx.ftz.f32 	%f343, %f338, %f342;
	sub.ftz.f32 	%f344, %f332, %f343;
	mov.f32 	%f345, 0f00000000;   	// 0
	max.ftz.f32 	%f346, %f344, %f345;
	mov.f32 	%f347, 0f3f800000;   	// 1
	min.ftz.f32 	%f348, %f346, %f347;
	mul.ftz.f32 	%f349, %f12, %f348;
	fma.rn.ftz.f32 	%f350, %f5, %f147, %f349;
	mul.ftz.f32 	%f351, %f149, %f350;
	fma.rn.ftz.f32 	%f328, %f9, %f152, %f351;
	mov.f32 	%f352, 0f3f800000;   	// 1
	mov.f32 	%f353, 0f3f800000;   	// 1
	mov.f32 	%f354, 0f00000000;   	// 0
	max.ftz.f32 	%f355, %f10, %f354;
	mov.f32 	%f356, 0f3f800000;   	// 1
	min.ftz.f32 	%f357, %f355, %f356;
	sub.ftz.f32 	%f358, %f353, %f357;
	mov.f32 	%f359, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f360, %f6, %f359;
	mov.f32 	%f361, 0f3f800000;   	// 1
	min.ftz.f32 	%f362, %f360, %f361;
	div.approx.ftz.f32 	%f363, %f358, %f362;
	sub.ftz.f32 	%f364, %f352, %f363;
	mov.f32 	%f365, 0f00000000;   	// 0
	max.ftz.f32 	%f366, %f364, %f365;
	mov.f32 	%f367, 0f3f800000;   	// 1
	min.ftz.f32 	%f368, %f366, %f367;
	mul.ftz.f32 	%f369, %f12, %f368;
	fma.rn.ftz.f32 	%f370, %f6, %f147, %f369;
	mul.ftz.f32 	%f371, %f149, %f370;
	fma.rn.ftz.f32 	%f327, %f10, %f152, %f371;
	mov.f32 	%f372, 0f3f800000;   	// 1
	mov.f32 	%f373, 0f3f800000;   	// 1
	mov.f32 	%f374, 0f00000000;   	// 0
	max.ftz.f32 	%f375, %f11, %f374;
	mov.f32 	%f376, 0f3f800000;   	// 1
	min.ftz.f32 	%f377, %f375, %f376;
	sub.ftz.f32 	%f378, %f373, %f377;
	mov.f32 	%f379, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f380, %f7, %f379;
	mov.f32 	%f381, 0f3f800000;   	// 1
	min.ftz.f32 	%f382, %f380, %f381;
	div.approx.ftz.f32 	%f383, %f378, %f382;
	sub.ftz.f32 	%f384, %f372, %f383;
	mov.f32 	%f385, 0f00000000;   	// 0
	max.ftz.f32 	%f386, %f384, %f385;
	mov.f32 	%f387, 0f3f800000;   	// 1
	min.ftz.f32 	%f388, %f386, %f387;
	mul.ftz.f32 	%f389, %f12, %f388;
	fma.rn.ftz.f32 	%f390, %f7, %f147, %f389;
	mul.ftz.f32 	%f391, %f149, %f390;
	fma.rn.ftz.f32 	%f326, %f11, %f152, %f391;
$Lt_124_278530:
	.loc	6	196	0
	mov.f32 	%f5, %f328;
	mov.f32 	%f6, %f327;
	mov.f32 	%f7, %f326;
	mov.f32 	%f8, %f322;
	bra.uni 	$Lt_124_313346;
$Lt_124_2050:
	.loc	22	474	0
	ld.param.f32 	%f392, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f392, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f393, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f393;
	mov.f32 	%f394, %f138;
	mov.f32 	%f395, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f396, %f138, %f395;
	mov.f32 	%f397, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p55, %f396, %f397;
	@!%p55 bra 	$Lt_124_279298;
	mov.f32 	%f398, 0f00000000;   	// 0
	mov.f32 	%f399, 0f00000000;   	// 0
	mov.f32 	%f400, 0f00000000;   	// 0
	mov.f32 	%f394, 0f00000000;   	// 0
	bra.uni 	$Lt_124_279042;
$Lt_124_279298:
	mov.f32 	%f401, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f401, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f402, 0f3f800000;   	// 1
	mul.ftz.f32 	%f403, %f135, %f148;
	sub.ftz.f32 	%f152, %f402, %f403;
	mov.f32 	%f404, 0f00000000;   	// 0
	max.ftz.f32 	%f405, %f5, %f404;
	mov.f32 	%f406, 0f3f800000;   	// 1
	min.ftz.f32 	%f407, %f405, %f406;
	mov.f32 	%f408, 0f00000000;   	// 0
	max.ftz.f32 	%f409, %f9, %f408;
	mov.f32 	%f410, 0f3f800000;   	// 1
	min.ftz.f32 	%f411, %f409, %f410;
	add.ftz.f32 	%f412, %f407, %f411;
	mov.f32 	%f413, 0fbf800000;   	// -1
	add.ftz.f32 	%f414, %f412, %f413;
	mov.f32 	%f415, 0f00000000;   	// 0
	max.ftz.f32 	%f416, %f414, %f415;
	mov.f32 	%f417, 0f3f800000;   	// 1
	min.ftz.f32 	%f418, %f416, %f417;
	mul.ftz.f32 	%f419, %f12, %f418;
	fma.rn.ftz.f32 	%f420, %f5, %f147, %f419;
	mul.ftz.f32 	%f421, %f149, %f420;
	fma.rn.ftz.f32 	%f400, %f9, %f152, %f421;
	mov.f32 	%f422, 0f00000000;   	// 0
	max.ftz.f32 	%f423, %f6, %f422;
	mov.f32 	%f424, 0f3f800000;   	// 1
	min.ftz.f32 	%f425, %f423, %f424;
	mov.f32 	%f426, 0f00000000;   	// 0
	max.ftz.f32 	%f427, %f10, %f426;
	mov.f32 	%f428, 0f3f800000;   	// 1
	min.ftz.f32 	%f429, %f427, %f428;
	add.ftz.f32 	%f430, %f425, %f429;
	mov.f32 	%f431, 0fbf800000;   	// -1
	add.ftz.f32 	%f432, %f430, %f431;
	mov.f32 	%f433, 0f00000000;   	// 0
	max.ftz.f32 	%f434, %f432, %f433;
	mov.f32 	%f435, 0f3f800000;   	// 1
	min.ftz.f32 	%f436, %f434, %f435;
	mul.ftz.f32 	%f437, %f12, %f436;
	fma.rn.ftz.f32 	%f438, %f6, %f147, %f437;
	mul.ftz.f32 	%f439, %f149, %f438;
	fma.rn.ftz.f32 	%f399, %f10, %f152, %f439;
	mov.f32 	%f440, 0f00000000;   	// 0
	max.ftz.f32 	%f441, %f7, %f440;
	mov.f32 	%f442, 0f3f800000;   	// 1
	min.ftz.f32 	%f443, %f441, %f442;
	mov.f32 	%f444, 0f00000000;   	// 0
	max.ftz.f32 	%f445, %f11, %f444;
	mov.f32 	%f446, 0f3f800000;   	// 1
	min.ftz.f32 	%f447, %f445, %f446;
	add.ftz.f32 	%f448, %f443, %f447;
	mov.f32 	%f449, 0fbf800000;   	// -1
	add.ftz.f32 	%f450, %f448, %f449;
	mov.f32 	%f451, 0f00000000;   	// 0
	max.ftz.f32 	%f452, %f450, %f451;
	mov.f32 	%f453, 0f3f800000;   	// 1
	min.ftz.f32 	%f454, %f452, %f453;
	mul.ftz.f32 	%f455, %f12, %f454;
	fma.rn.ftz.f32 	%f456, %f7, %f147, %f455;
	mul.ftz.f32 	%f457, %f149, %f456;
	fma.rn.ftz.f32 	%f398, %f11, %f152, %f457;
$Lt_124_279042:
	.loc	6	197	0
	mov.f32 	%f5, %f400;
	mov.f32 	%f6, %f399;
	mov.f32 	%f7, %f398;
	mov.f32 	%f8, %f394;
	bra.uni 	$Lt_124_313346;
$Lt_124_2306:
	.loc	6	198	0
	ld.param.f32 	%f458, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f458, %f8;
	mov.f32 	%f459, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f460, %f135, %f459;
	mov.f32 	%f461, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p56, %f460, %f461;
	@!%p56 bra 	$Lt_124_279810;
	.loc	22	608	0
	mov.f32 	%f462, %f9;
	mov.f32 	%f463, %f10;
	mov.f32 	%f464, %f11;
	mov.f32 	%f465, %f12;
	bra.uni 	$Lt_124_280578;
$Lt_124_279810:
	mov.f32 	%f466, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f467, %f12, %f466;
	mov.f32 	%f468, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p57, %f467, %f468;
	@!%p57 bra 	$Lt_124_280322;
	mov.f32 	%f462, %f5;
	mov.f32 	%f463, %f6;
	mov.f32 	%f464, %f7;
	mov.f32 	%f465, %f135;
	bra.uni 	$Lt_124_280578;
$Lt_124_280322:
	mov.u32 	%r115, 720;
	setp.gt.s32 	%p58, %r16, %r115;
	@%p58 bra 	$Lt_124_280834;
	.loc	22	555	0
	ld.const.f32 	%f469, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f470, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f471, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f472, %f6, %f471;
	fma.rn.ftz.f32 	%f473, %f470, %f7, %f472;
	fma.rn.ftz.f32 	%f474, %f469, %f5, %f473;
	cvt.ftz.sat.f32.f32 	%f475, %f474;
	mul.ftz.f32 	%f476, %f10, %f471;
	fma.rn.ftz.f32 	%f477, %f470, %f11, %f476;
	fma.rn.ftz.f32 	%f478, %f469, %f9, %f477;
	cvt.ftz.sat.f32.f32 	%f479, %f478;
	setp.lt.ftz.f32 	%p59, %f475, %f479;
	@!%p59 bra 	$Lt_124_236034;
	.loc	22	468	0
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f480, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f480;
	mov.f32 	%f481, %f138;
	mov.f32 	%f482, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f483, %f138, %f482;
	mov.f32 	%f484, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p60, %f483, %f484;
	@!%p60 bra 	$Lt_124_281346;
	mov.f32 	%f485, 0f00000000;   	// 0
	mov.f32 	%f486, 0f00000000;   	// 0
	mov.f32 	%f487, 0f00000000;   	// 0
	mov.f32 	%f481, 0f00000000;   	// 0
	bra.uni 	$Lt_124_281090;
$Lt_124_281346:
	mov.f32 	%f488, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f488, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f489, 0f3f800000;   	// 1
	mul.ftz.f32 	%f490, %f135, %f148;
	sub.ftz.f32 	%f152, %f489, %f490;
	mul.ftz.f32 	%f491, %f147, %f5;
	fma.rn.ftz.f32 	%f492, %f5, %f12, %f491;
	mul.ftz.f32 	%f493, %f149, %f492;
	fma.rn.ftz.f32 	%f487, %f9, %f152, %f493;
	mul.ftz.f32 	%f494, %f147, %f6;
	fma.rn.ftz.f32 	%f495, %f6, %f12, %f494;
	mul.ftz.f32 	%f496, %f149, %f495;
	fma.rn.ftz.f32 	%f486, %f10, %f152, %f496;
	mul.ftz.f32 	%f497, %f147, %f7;
	fma.rn.ftz.f32 	%f498, %f7, %f12, %f497;
	mul.ftz.f32 	%f499, %f149, %f498;
	fma.rn.ftz.f32 	%f485, %f11, %f152, %f499;
$Lt_124_281090:
	.loc	22	557	0
	mov.f32 	%f500, %f487;
	mov.f32 	%f501, %f486;
	mov.f32 	%f502, %f485;
	mov.f32 	%f503, %f481;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_301_54;
$Lt_124_236034:
	.loc	22	561	0
	mov.f32 	%f500, %f9;
	mov.f32 	%f501, %f10;
	mov.f32 	%f502, %f11;
	mov.f32 	%f503, %f12;
$LDWendi__Z10GetLuma6018PixelRGB_301_54:
	.loc	22	608	0
	mov.f32 	%f462, %f500;
	mov.f32 	%f463, %f501;
	mov.f32 	%f464, %f502;
	mov.f32 	%f465, %f503;
	bra.uni 	$Lt_124_280578;
$Lt_124_280834:
	.loc	22	569	0
	ld.const.f32 	%f504, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f505, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f506, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f507, %f6, %f506;
	fma.rn.ftz.f32 	%f508, %f505, %f7, %f507;
	fma.rn.ftz.f32 	%f509, %f504, %f5, %f508;
	cvt.ftz.sat.f32.f32 	%f510, %f509;
	mul.ftz.f32 	%f511, %f10, %f506;
	fma.rn.ftz.f32 	%f512, %f505, %f11, %f511;
	fma.rn.ftz.f32 	%f513, %f504, %f9, %f512;
	cvt.ftz.sat.f32.f32 	%f514, %f513;
	setp.lt.ftz.f32 	%p61, %f510, %f514;
	@!%p61 bra 	$Lt_124_236546;
	.loc	22	468	0
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f515, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f515;
	mov.f32 	%f516, %f138;
	mov.f32 	%f517, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f518, %f138, %f517;
	mov.f32 	%f519, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p62, %f518, %f519;
	@!%p62 bra 	$Lt_124_281858;
	mov.f32 	%f520, 0f00000000;   	// 0
	mov.f32 	%f521, 0f00000000;   	// 0
	mov.f32 	%f522, 0f00000000;   	// 0
	mov.f32 	%f516, 0f00000000;   	// 0
	bra.uni 	$Lt_124_281602;
$Lt_124_281858:
	mov.f32 	%f523, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f523, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f524, 0f3f800000;   	// 1
	mul.ftz.f32 	%f525, %f135, %f148;
	sub.ftz.f32 	%f152, %f524, %f525;
	mul.ftz.f32 	%f526, %f147, %f5;
	fma.rn.ftz.f32 	%f527, %f5, %f12, %f526;
	mul.ftz.f32 	%f528, %f149, %f527;
	fma.rn.ftz.f32 	%f522, %f9, %f152, %f528;
	mul.ftz.f32 	%f529, %f147, %f6;
	fma.rn.ftz.f32 	%f530, %f6, %f12, %f529;
	mul.ftz.f32 	%f531, %f149, %f530;
	fma.rn.ftz.f32 	%f521, %f10, %f152, %f531;
	mul.ftz.f32 	%f532, %f147, %f7;
	fma.rn.ftz.f32 	%f533, %f7, %f12, %f532;
	mul.ftz.f32 	%f534, %f149, %f533;
	fma.rn.ftz.f32 	%f520, %f11, %f152, %f534;
$Lt_124_281602:
	.loc	22	571	0
	mov.f32 	%f535, %f522;
	mov.f32 	%f536, %f521;
	mov.f32 	%f537, %f520;
	mov.f32 	%f538, %f516;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_301_52;
$Lt_124_236546:
	.loc	22	575	0
	mov.f32 	%f535, %f9;
	mov.f32 	%f536, %f10;
	mov.f32 	%f537, %f11;
	mov.f32 	%f538, %f12;
$LDWendi__Z10GetLuma7098PixelRGB_301_52:
	.loc	22	608	0
	mov.f32 	%f462, %f535;
	mov.f32 	%f463, %f536;
	mov.f32 	%f464, %f537;
	mov.f32 	%f465, %f538;
$Lt_124_280578:
$Lt_124_280066:
$Lt_124_279554:
	.loc	6	198	0
	mov.f32 	%f5, %f462;
	mov.f32 	%f6, %f463;
	mov.f32 	%f7, %f464;
	mov.f32 	%f8, %f465;
	bra.uni 	$Lt_124_313346;
$Lt_124_2562:
	.loc	22	475	0
	ld.param.f32 	%f539, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f539, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f540, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f540;
	mov.f32 	%f541, %f138;
	mov.f32 	%f542, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f543, %f138, %f542;
	mov.f32 	%f544, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p63, %f543, %f544;
	@!%p63 bra 	$Lt_124_282370;
	mov.f32 	%f545, 0f00000000;   	// 0
	mov.f32 	%f546, 0f00000000;   	// 0
	mov.f32 	%f547, 0f00000000;   	// 0
	mov.f32 	%f541, 0f00000000;   	// 0
	bra.uni 	$Lt_124_282114;
$Lt_124_282370:
	mov.f32 	%f548, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f548, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f549, 0f3f800000;   	// 1
	mul.ftz.f32 	%f550, %f135, %f148;
	sub.ftz.f32 	%f152, %f549, %f550;
	mov.f32 	%f551, 0f00000000;   	// 0
	max.ftz.f32 	%f552, %f9, %f551;
	mov.f32 	%f553, 0f3f800000;   	// 1
	min.ftz.f32 	%f554, %f552, %f553;
	mov.f32 	%f555, 0f3f800000;   	// 1
	mov.f32 	%f556, 0f00000000;   	// 0
	max.ftz.f32 	%f557, %f5, %f556;
	mov.f32 	%f558, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f559, %f557, %f558;
	sub.ftz.f32 	%f560, %f555, %f559;
	div.approx.ftz.f32 	%f561, %f554, %f560;
	mov.f32 	%f562, 0f00000000;   	// 0
	max.ftz.f32 	%f563, %f561, %f562;
	mov.f32 	%f564, 0f3f800000;   	// 1
	min.ftz.f32 	%f565, %f563, %f564;
	mul.ftz.f32 	%f566, %f12, %f565;
	fma.rn.ftz.f32 	%f567, %f5, %f147, %f566;
	mul.ftz.f32 	%f568, %f149, %f567;
	fma.rn.ftz.f32 	%f547, %f9, %f152, %f568;
	mov.f32 	%f569, 0f00000000;   	// 0
	max.ftz.f32 	%f570, %f10, %f569;
	mov.f32 	%f571, 0f3f800000;   	// 1
	min.ftz.f32 	%f572, %f570, %f571;
	mov.f32 	%f573, 0f3f800000;   	// 1
	mov.f32 	%f574, 0f00000000;   	// 0
	max.ftz.f32 	%f575, %f6, %f574;
	mov.f32 	%f576, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f577, %f575, %f576;
	sub.ftz.f32 	%f578, %f573, %f577;
	div.approx.ftz.f32 	%f579, %f572, %f578;
	mov.f32 	%f580, 0f00000000;   	// 0
	max.ftz.f32 	%f581, %f579, %f580;
	mov.f32 	%f582, 0f3f800000;   	// 1
	min.ftz.f32 	%f583, %f581, %f582;
	mul.ftz.f32 	%f584, %f12, %f583;
	fma.rn.ftz.f32 	%f585, %f6, %f147, %f584;
	mul.ftz.f32 	%f586, %f149, %f585;
	fma.rn.ftz.f32 	%f546, %f10, %f152, %f586;
	mov.f32 	%f587, 0f00000000;   	// 0
	max.ftz.f32 	%f588, %f11, %f587;
	mov.f32 	%f589, 0f3f800000;   	// 1
	min.ftz.f32 	%f590, %f588, %f589;
	mov.f32 	%f591, 0f3f800000;   	// 1
	mov.f32 	%f592, 0f00000000;   	// 0
	max.ftz.f32 	%f593, %f7, %f592;
	mov.f32 	%f594, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f595, %f593, %f594;
	sub.ftz.f32 	%f596, %f591, %f595;
	div.approx.ftz.f32 	%f597, %f590, %f596;
	mov.f32 	%f598, 0f00000000;   	// 0
	max.ftz.f32 	%f599, %f597, %f598;
	mov.f32 	%f600, 0f3f800000;   	// 1
	min.ftz.f32 	%f601, %f599, %f600;
	mul.ftz.f32 	%f602, %f12, %f601;
	fma.rn.ftz.f32 	%f603, %f7, %f147, %f602;
	mul.ftz.f32 	%f604, %f149, %f603;
	fma.rn.ftz.f32 	%f545, %f11, %f152, %f604;
$Lt_124_282114:
	.loc	6	199	0
	mov.f32 	%f5, %f547;
	mov.f32 	%f6, %f546;
	mov.f32 	%f7, %f545;
	mov.f32 	%f8, %f541;
	bra.uni 	$Lt_124_313346;
$Lt_124_2818:
	.loc	22	476	0
	ld.param.f32 	%f605, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f605, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f606, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f606;
	mov.f32 	%f607, %f138;
	mov.f32 	%f608, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f609, %f138, %f608;
	mov.f32 	%f610, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p64, %f609, %f610;
	@!%p64 bra 	$Lt_124_282882;
	mov.f32 	%f611, 0f00000000;   	// 0
	mov.f32 	%f612, 0f00000000;   	// 0
	mov.f32 	%f613, 0f00000000;   	// 0
	mov.f32 	%f607, 0f00000000;   	// 0
	bra.uni 	$Lt_124_282626;
$Lt_124_282882:
	mov.f32 	%f614, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f614, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f615, 0f3f800000;   	// 1
	mul.ftz.f32 	%f616, %f135, %f148;
	sub.ftz.f32 	%f152, %f615, %f616;
	add.ftz.f32 	%f617, %f5, %f9;
	mov.f32 	%f618, 0f00000000;   	// 0
	max.ftz.f32 	%f619, %f617, %f618;
	mov.f32 	%f620, 0f3f800000;   	// 1
	min.ftz.f32 	%f621, %f619, %f620;
	mul.ftz.f32 	%f622, %f12, %f621;
	fma.rn.ftz.f32 	%f623, %f5, %f147, %f622;
	mul.ftz.f32 	%f624, %f149, %f623;
	fma.rn.ftz.f32 	%f613, %f9, %f152, %f624;
	add.ftz.f32 	%f625, %f6, %f10;
	mov.f32 	%f626, 0f00000000;   	// 0
	max.ftz.f32 	%f627, %f625, %f626;
	mov.f32 	%f628, 0f3f800000;   	// 1
	min.ftz.f32 	%f629, %f627, %f628;
	mul.ftz.f32 	%f630, %f12, %f629;
	fma.rn.ftz.f32 	%f631, %f6, %f147, %f630;
	mul.ftz.f32 	%f632, %f149, %f631;
	fma.rn.ftz.f32 	%f612, %f10, %f152, %f632;
	add.ftz.f32 	%f633, %f7, %f11;
	mov.f32 	%f634, 0f00000000;   	// 0
	max.ftz.f32 	%f635, %f633, %f634;
	mov.f32 	%f636, 0f3f800000;   	// 1
	min.ftz.f32 	%f637, %f635, %f636;
	mul.ftz.f32 	%f638, %f12, %f637;
	fma.rn.ftz.f32 	%f639, %f7, %f147, %f638;
	mul.ftz.f32 	%f640, %f149, %f639;
	fma.rn.ftz.f32 	%f611, %f11, %f152, %f640;
$Lt_124_282626:
	.loc	6	200	0
	mov.f32 	%f5, %f613;
	mov.f32 	%f6, %f612;
	mov.f32 	%f7, %f611;
	mov.f32 	%f8, %f607;
	bra.uni 	$Lt_124_313346;
$Lt_124_3074:
	.loc	6	201	0
	ld.param.f32 	%f641, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f641, %f8;
	mov.f32 	%f642, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f643, %f135, %f642;
	mov.f32 	%f644, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p65, %f643, %f644;
	@!%p65 bra 	$Lt_124_283394;
	.loc	22	609	0
	mov.f32 	%f645, %f9;
	mov.f32 	%f646, %f10;
	mov.f32 	%f647, %f11;
	mov.f32 	%f648, %f12;
	bra.uni 	$Lt_124_284162;
$Lt_124_283394:
	mov.f32 	%f649, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f650, %f12, %f649;
	mov.f32 	%f651, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p66, %f650, %f651;
	@!%p66 bra 	$Lt_124_283906;
	mov.f32 	%f645, %f5;
	mov.f32 	%f646, %f6;
	mov.f32 	%f647, %f7;
	mov.f32 	%f648, %f135;
	bra.uni 	$Lt_124_284162;
$Lt_124_283906:
	mov.u32 	%r116, 720;
	setp.gt.s32 	%p67, %r16, %r116;
	@%p67 bra 	$Lt_124_284418;
	.loc	22	584	0
	ld.const.f32 	%f469, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f470, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f471, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f652, %f6, %f471;
	fma.rn.ftz.f32 	%f653, %f470, %f7, %f652;
	fma.rn.ftz.f32 	%f654, %f469, %f5, %f653;
	cvt.ftz.sat.f32.f32 	%f655, %f654;
	mul.ftz.f32 	%f656, %f10, %f471;
	fma.rn.ftz.f32 	%f657, %f470, %f11, %f656;
	fma.rn.ftz.f32 	%f658, %f469, %f9, %f657;
	cvt.ftz.sat.f32.f32 	%f659, %f658;
	setp.gt.ftz.f32 	%p68, %f655, %f659;
	@!%p68 bra 	$Lt_124_238338;
	.loc	22	468	0
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f660, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f660;
	mov.f32 	%f661, %f138;
	mov.f32 	%f662, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f663, %f138, %f662;
	mov.f32 	%f664, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p69, %f663, %f664;
	@!%p69 bra 	$Lt_124_284930;
	mov.f32 	%f665, 0f00000000;   	// 0
	mov.f32 	%f666, 0f00000000;   	// 0
	mov.f32 	%f667, 0f00000000;   	// 0
	mov.f32 	%f661, 0f00000000;   	// 0
	bra.uni 	$Lt_124_284674;
$Lt_124_284930:
	mov.f32 	%f668, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f668, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f669, 0f3f800000;   	// 1
	mul.ftz.f32 	%f670, %f135, %f148;
	sub.ftz.f32 	%f152, %f669, %f670;
	mul.ftz.f32 	%f671, %f147, %f5;
	fma.rn.ftz.f32 	%f672, %f5, %f12, %f671;
	mul.ftz.f32 	%f673, %f149, %f672;
	fma.rn.ftz.f32 	%f667, %f9, %f152, %f673;
	mul.ftz.f32 	%f674, %f147, %f6;
	fma.rn.ftz.f32 	%f675, %f6, %f12, %f674;
	mul.ftz.f32 	%f676, %f149, %f675;
	fma.rn.ftz.f32 	%f666, %f10, %f152, %f676;
	mul.ftz.f32 	%f677, %f147, %f7;
	fma.rn.ftz.f32 	%f678, %f7, %f12, %f677;
	mul.ftz.f32 	%f679, %f149, %f678;
	fma.rn.ftz.f32 	%f665, %f11, %f152, %f679;
$Lt_124_284674:
	.loc	22	586	0
	mov.f32 	%f680, %f667;
	mov.f32 	%f681, %f666;
	mov.f32 	%f682, %f665;
	mov.f32 	%f683, %f661;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_301_50;
$Lt_124_238338:
	.loc	22	590	0
	mov.f32 	%f680, %f9;
	mov.f32 	%f681, %f10;
	mov.f32 	%f682, %f11;
	mov.f32 	%f683, %f12;
$LDWendi__Z10GetLuma6018PixelRGB_301_50:
	.loc	22	609	0
	mov.f32 	%f645, %f680;
	mov.f32 	%f646, %f681;
	mov.f32 	%f647, %f682;
	mov.f32 	%f648, %f683;
	bra.uni 	$Lt_124_284162;
$Lt_124_284418:
	.loc	22	598	0
	ld.const.f32 	%f684, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f685, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f686, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f687, %f6, %f686;
	fma.rn.ftz.f32 	%f688, %f685, %f7, %f687;
	fma.rn.ftz.f32 	%f689, %f684, %f5, %f688;
	cvt.ftz.sat.f32.f32 	%f690, %f689;
	mul.ftz.f32 	%f691, %f10, %f686;
	fma.rn.ftz.f32 	%f692, %f685, %f11, %f691;
	fma.rn.ftz.f32 	%f693, %f684, %f9, %f692;
	cvt.ftz.sat.f32.f32 	%f694, %f693;
	setp.gt.ftz.f32 	%p70, %f690, %f694;
	@!%p70 bra 	$Lt_124_238850;
	.loc	22	468	0
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f695, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f695;
	mov.f32 	%f696, %f138;
	mov.f32 	%f697, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f698, %f138, %f697;
	mov.f32 	%f699, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p71, %f698, %f699;
	@!%p71 bra 	$Lt_124_285442;
	mov.f32 	%f700, 0f00000000;   	// 0
	mov.f32 	%f701, 0f00000000;   	// 0
	mov.f32 	%f702, 0f00000000;   	// 0
	mov.f32 	%f696, 0f00000000;   	// 0
	bra.uni 	$Lt_124_285186;
$Lt_124_285442:
	mov.f32 	%f703, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f703, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f704, 0f3f800000;   	// 1
	mul.ftz.f32 	%f705, %f135, %f148;
	sub.ftz.f32 	%f152, %f704, %f705;
	mul.ftz.f32 	%f706, %f147, %f5;
	fma.rn.ftz.f32 	%f707, %f5, %f12, %f706;
	mul.ftz.f32 	%f708, %f149, %f707;
	fma.rn.ftz.f32 	%f702, %f9, %f152, %f708;
	mul.ftz.f32 	%f709, %f147, %f6;
	fma.rn.ftz.f32 	%f710, %f6, %f12, %f709;
	mul.ftz.f32 	%f711, %f149, %f710;
	fma.rn.ftz.f32 	%f701, %f10, %f152, %f711;
	mul.ftz.f32 	%f712, %f147, %f7;
	fma.rn.ftz.f32 	%f713, %f7, %f12, %f712;
	mul.ftz.f32 	%f714, %f149, %f713;
	fma.rn.ftz.f32 	%f700, %f11, %f152, %f714;
$Lt_124_285186:
	.loc	22	600	0
	mov.f32 	%f715, %f702;
	mov.f32 	%f716, %f701;
	mov.f32 	%f717, %f700;
	mov.f32 	%f718, %f696;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_301_48;
$Lt_124_238850:
	.loc	22	604	0
	mov.f32 	%f715, %f9;
	mov.f32 	%f716, %f10;
	mov.f32 	%f717, %f11;
	mov.f32 	%f718, %f12;
$LDWendi__Z10GetLuma7098PixelRGB_301_48:
	.loc	22	609	0
	mov.f32 	%f645, %f715;
	mov.f32 	%f646, %f716;
	mov.f32 	%f647, %f717;
	mov.f32 	%f648, %f718;
$Lt_124_284162:
$Lt_124_283650:
$Lt_124_283138:
	.loc	6	201	0
	mov.f32 	%f5, %f645;
	mov.f32 	%f6, %f646;
	mov.f32 	%f7, %f647;
	mov.f32 	%f8, %f648;
	bra.uni 	$Lt_124_313346;
$Lt_124_3330:
	.loc	22	477	0
	ld.param.f32 	%f719, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f719, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f720, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f720;
	mov.f32 	%f721, %f138;
	mov.f32 	%f722, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f723, %f138, %f722;
	mov.f32 	%f724, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p72, %f723, %f724;
	@!%p72 bra 	$Lt_124_285954;
	mov.f32 	%f725, 0f00000000;   	// 0
	mov.f32 	%f726, 0f00000000;   	// 0
	mov.f32 	%f727, 0f00000000;   	// 0
	mov.f32 	%f721, 0f00000000;   	// 0
	bra.uni 	$Lt_124_285698;
$Lt_124_285954:
	.loc	22	373	0
	mov.f32 	%f728, 0f00000000;   	// 0
	max.ftz.f32 	%f729, %f9, %f728;
	mov.f32 	%f730, 0f00000000;   	// 0
	max.ftz.f32 	%f731, %f5, %f730;
	mov.f32 	%f732, 0f3f800000;   	// 1
	min.ftz.f32 	%f733, %f729, %f732;
	mov.f32 	%f734, 0f3f800000;   	// 1
	min.ftz.f32 	%f735, %f731, %f734;
	mov.f32 	%f736, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p73, %f733, %f736;
	@!%p73 bra 	$Lt_124_286466;
	add.ftz.f32 	%f737, %f735, %f735;
	mul.ftz.f32 	%f738, %f733, %f737;
	bra.uni 	$Lt_124_286210;
$Lt_124_286466:
	mov.f32 	%f739, 0f3f800000;   	// 1
	sub.ftz.f32 	%f740, %f739, %f735;
	mov.f32 	%f741, 0f3f800000;   	// 1
	add.ftz.f32 	%f742, %f740, %f740;
	mov.f32 	%f743, 0f3f800000;   	// 1
	sub.ftz.f32 	%f744, %f743, %f733;
	mul.ftz.f32 	%f745, %f742, %f744;
	sub.ftz.f32 	%f738, %f741, %f745;
$Lt_124_286210:
	.loc	22	477	0
	mov.f32 	%f746, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f746, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f747, 0f3f800000;   	// 1
	mul.ftz.f32 	%f748, %f135, %f148;
	sub.ftz.f32 	%f152, %f747, %f748;
	mov.f32 	%f749, 0f00000000;   	// 0
	max.ftz.f32 	%f750, %f738, %f749;
	mov.f32 	%f751, 0f3f800000;   	// 1
	min.ftz.f32 	%f752, %f750, %f751;
	mul.ftz.f32 	%f753, %f12, %f752;
	fma.rn.ftz.f32 	%f754, %f5, %f147, %f753;
	mul.ftz.f32 	%f755, %f149, %f754;
	fma.rn.ftz.f32 	%f727, %f9, %f152, %f755;
	.loc	22	373	0
	mov.f32 	%f756, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f10, %f756;
	mov.f32 	%f758, 0f00000000;   	// 0
	max.ftz.f32 	%f759, %f6, %f758;
	mov.f32 	%f760, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f760;
	mov.f32 	%f762, 0f3f800000;   	// 1
	min.ftz.f32 	%f763, %f759, %f762;
	mov.f32 	%f764, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p74, %f761, %f764;
	@!%p74 bra 	$Lt_124_286978;
	add.ftz.f32 	%f765, %f763, %f763;
	mul.ftz.f32 	%f766, %f761, %f765;
	bra.uni 	$Lt_124_286722;
$Lt_124_286978:
	mov.f32 	%f767, 0f3f800000;   	// 1
	sub.ftz.f32 	%f768, %f767, %f763;
	mov.f32 	%f769, 0f3f800000;   	// 1
	add.ftz.f32 	%f770, %f768, %f768;
	mov.f32 	%f771, 0f3f800000;   	// 1
	sub.ftz.f32 	%f772, %f771, %f761;
	mul.ftz.f32 	%f773, %f770, %f772;
	sub.ftz.f32 	%f766, %f769, %f773;
$Lt_124_286722:
	.loc	22	477	0
	mov.f32 	%f774, 0f00000000;   	// 0
	max.ftz.f32 	%f775, %f766, %f774;
	mov.f32 	%f776, 0f3f800000;   	// 1
	min.ftz.f32 	%f777, %f775, %f776;
	mul.ftz.f32 	%f778, %f12, %f777;
	fma.rn.ftz.f32 	%f779, %f6, %f147, %f778;
	mul.ftz.f32 	%f780, %f149, %f779;
	fma.rn.ftz.f32 	%f726, %f10, %f152, %f780;
	.loc	22	373	0
	mov.f32 	%f781, 0f00000000;   	// 0
	max.ftz.f32 	%f782, %f11, %f781;
	mov.f32 	%f783, 0f00000000;   	// 0
	max.ftz.f32 	%f784, %f7, %f783;
	mov.f32 	%f785, 0f3f800000;   	// 1
	min.ftz.f32 	%f786, %f782, %f785;
	mov.f32 	%f787, 0f3f800000;   	// 1
	min.ftz.f32 	%f788, %f784, %f787;
	mov.f32 	%f789, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p75, %f786, %f789;
	@!%p75 bra 	$Lt_124_287490;
	add.ftz.f32 	%f790, %f788, %f788;
	mul.ftz.f32 	%f791, %f786, %f790;
	bra.uni 	$Lt_124_287234;
$Lt_124_287490:
	mov.f32 	%f792, 0f3f800000;   	// 1
	sub.ftz.f32 	%f793, %f792, %f788;
	mov.f32 	%f794, 0f3f800000;   	// 1
	add.ftz.f32 	%f795, %f793, %f793;
	mov.f32 	%f796, 0f3f800000;   	// 1
	sub.ftz.f32 	%f797, %f796, %f786;
	mul.ftz.f32 	%f798, %f795, %f797;
	sub.ftz.f32 	%f791, %f794, %f798;
$Lt_124_287234:
	.loc	22	477	0
	mov.f32 	%f799, 0f00000000;   	// 0
	max.ftz.f32 	%f800, %f791, %f799;
	mov.f32 	%f801, 0f3f800000;   	// 1
	min.ftz.f32 	%f802, %f800, %f801;
	mul.ftz.f32 	%f803, %f12, %f802;
	fma.rn.ftz.f32 	%f804, %f7, %f147, %f803;
	mul.ftz.f32 	%f805, %f149, %f804;
	fma.rn.ftz.f32 	%f725, %f11, %f152, %f805;
$Lt_124_285698:
	.loc	6	202	0
	mov.f32 	%f5, %f727;
	mov.f32 	%f6, %f726;
	mov.f32 	%f7, %f725;
	mov.f32 	%f8, %f721;
	bra.uni 	$Lt_124_313346;
$Lt_124_3586:
	.loc	22	478	0
	ld.param.f32 	%f806, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f806, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f807, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f807;
	mov.f32 	%f808, %f138;
	mov.f32 	%f809, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f810, %f138, %f809;
	mov.f32 	%f811, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p76, %f810, %f811;
	@!%p76 bra 	$Lt_124_288002;
	mov.f32 	%f812, 0f00000000;   	// 0
	mov.f32 	%f813, 0f00000000;   	// 0
	mov.f32 	%f814, 0f00000000;   	// 0
	mov.f32 	%f808, 0f00000000;   	// 0
	bra.uni 	$Lt_124_287746;
$Lt_124_288002:
	.loc	22	380	0
	mov.f32 	%f815, 0f00000000;   	// 0
	max.ftz.f32 	%f729, %f9, %f815;
	mov.f32 	%f816, 0f00000000;   	// 0
	max.ftz.f32 	%f731, %f5, %f816;
	mov.f32 	%f817, 0f3f800000;   	// 1
	min.ftz.f32 	%f733, %f729, %f817;
	mov.f32 	%f818, 0f3f800000;   	// 1
	min.ftz.f32 	%f735, %f731, %f818;
	add.ftz.f32 	%f819, %f735, %f735;
	mov.f32 	%f820, 0fbf800000;   	// -1
	add.ftz.f32 	%f821, %f819, %f820;
	mov.f32 	%f822, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p77, %f735, %f822;
	@!%p77 bra 	$Lt_124_288514;
	mul.ftz.f32 	%f823, %f733, %f733;
	sub.ftz.f32 	%f824, %f733, %f823;
	fma.rn.ftz.f32 	%f825, %f821, %f824, %f733;
	bra.uni 	$Lt_124_288258;
$Lt_124_288514:
	sqrt.approx.ftz.f32 	%f826, %f733;
	sub.ftz.f32 	%f827, %f826, %f733;
	fma.rn.ftz.f32 	%f825, %f821, %f827, %f733;
$Lt_124_288258:
	.loc	22	478	0
	mov.f32 	%f828, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f828, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f829, 0f3f800000;   	// 1
	mul.ftz.f32 	%f830, %f135, %f148;
	sub.ftz.f32 	%f152, %f829, %f830;
	mov.f32 	%f831, 0f00000000;   	// 0
	max.ftz.f32 	%f832, %f825, %f831;
	mov.f32 	%f833, 0f3f800000;   	// 1
	min.ftz.f32 	%f834, %f832, %f833;
	mul.ftz.f32 	%f835, %f12, %f834;
	fma.rn.ftz.f32 	%f836, %f5, %f147, %f835;
	mul.ftz.f32 	%f837, %f149, %f836;
	fma.rn.ftz.f32 	%f814, %f9, %f152, %f837;
	.loc	22	380	0
	mov.f32 	%f838, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f10, %f838;
	mov.f32 	%f839, 0f00000000;   	// 0
	max.ftz.f32 	%f759, %f6, %f839;
	mov.f32 	%f840, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f840;
	mov.f32 	%f841, 0f3f800000;   	// 1
	min.ftz.f32 	%f763, %f759, %f841;
	add.ftz.f32 	%f842, %f763, %f763;
	mov.f32 	%f843, 0fbf800000;   	// -1
	add.ftz.f32 	%f844, %f842, %f843;
	mov.f32 	%f845, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p78, %f763, %f845;
	@!%p78 bra 	$Lt_124_289026;
	mul.ftz.f32 	%f846, %f761, %f761;
	sub.ftz.f32 	%f847, %f761, %f846;
	fma.rn.ftz.f32 	%f848, %f844, %f847, %f761;
	bra.uni 	$Lt_124_288770;
$Lt_124_289026:
	sqrt.approx.ftz.f32 	%f849, %f761;
	sub.ftz.f32 	%f850, %f849, %f761;
	fma.rn.ftz.f32 	%f848, %f844, %f850, %f761;
$Lt_124_288770:
	.loc	22	478	0
	mov.f32 	%f851, 0f00000000;   	// 0
	max.ftz.f32 	%f852, %f848, %f851;
	mov.f32 	%f853, 0f3f800000;   	// 1
	min.ftz.f32 	%f854, %f852, %f853;
	mul.ftz.f32 	%f855, %f12, %f854;
	fma.rn.ftz.f32 	%f856, %f6, %f147, %f855;
	mul.ftz.f32 	%f857, %f149, %f856;
	fma.rn.ftz.f32 	%f813, %f10, %f152, %f857;
	.loc	22	380	0
	mov.f32 	%f858, 0f00000000;   	// 0
	max.ftz.f32 	%f782, %f11, %f858;
	mov.f32 	%f859, 0f00000000;   	// 0
	max.ftz.f32 	%f784, %f7, %f859;
	mov.f32 	%f860, 0f3f800000;   	// 1
	min.ftz.f32 	%f786, %f782, %f860;
	mov.f32 	%f861, 0f3f800000;   	// 1
	min.ftz.f32 	%f788, %f784, %f861;
	add.ftz.f32 	%f862, %f788, %f788;
	mov.f32 	%f863, 0fbf800000;   	// -1
	add.ftz.f32 	%f864, %f862, %f863;
	mov.f32 	%f865, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p79, %f788, %f865;
	@!%p79 bra 	$Lt_124_289538;
	mul.ftz.f32 	%f866, %f786, %f786;
	sub.ftz.f32 	%f867, %f786, %f866;
	fma.rn.ftz.f32 	%f868, %f864, %f867, %f786;
	bra.uni 	$Lt_124_289282;
$Lt_124_289538:
	sqrt.approx.ftz.f32 	%f869, %f786;
	sub.ftz.f32 	%f870, %f869, %f786;
	fma.rn.ftz.f32 	%f868, %f864, %f870, %f786;
$Lt_124_289282:
	.loc	22	478	0
	mov.f32 	%f871, 0f00000000;   	// 0
	max.ftz.f32 	%f872, %f868, %f871;
	mov.f32 	%f873, 0f3f800000;   	// 1
	min.ftz.f32 	%f874, %f872, %f873;
	mul.ftz.f32 	%f875, %f12, %f874;
	fma.rn.ftz.f32 	%f876, %f7, %f147, %f875;
	mul.ftz.f32 	%f877, %f149, %f876;
	fma.rn.ftz.f32 	%f812, %f11, %f152, %f877;
$Lt_124_287746:
	.loc	6	203	0
	mov.f32 	%f5, %f814;
	mov.f32 	%f6, %f813;
	mov.f32 	%f7, %f812;
	mov.f32 	%f8, %f808;
	bra.uni 	$Lt_124_313346;
$Lt_124_3842:
	.loc	22	479	0
	ld.param.f32 	%f878, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f878, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f879, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f879;
	mov.f32 	%f880, %f138;
	mov.f32 	%f881, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f882, %f138, %f881;
	mov.f32 	%f883, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p80, %f882, %f883;
	@!%p80 bra 	$Lt_124_290050;
	mov.f32 	%f884, 0f00000000;   	// 0
	mov.f32 	%f885, 0f00000000;   	// 0
	mov.f32 	%f886, 0f00000000;   	// 0
	mov.f32 	%f880, 0f00000000;   	// 0
	bra.uni 	$Lt_124_289794;
$Lt_124_290050:
	.loc	22	386	0
	mov.f32 	%f887, 0f00000000;   	// 0
	max.ftz.f32 	%f729, %f9, %f887;
	mov.f32 	%f888, 0f00000000;   	// 0
	max.ftz.f32 	%f731, %f5, %f888;
	mov.f32 	%f889, 0f3f800000;   	// 1
	min.ftz.f32 	%f733, %f729, %f889;
	mov.f32 	%f890, 0f3f800000;   	// 1
	min.ftz.f32 	%f735, %f731, %f890;
	mov.f32 	%f891, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p81, %f735, %f891;
	@!%p81 bra 	$Lt_124_290562;
	add.ftz.f32 	%f892, %f735, %f735;
	mul.ftz.f32 	%f893, %f733, %f892;
	bra.uni 	$Lt_124_290306;
$Lt_124_290562:
	mov.f32 	%f894, 0f3f800000;   	// 1
	sub.ftz.f32 	%f895, %f894, %f735;
	mov.f32 	%f896, 0f3f800000;   	// 1
	add.ftz.f32 	%f897, %f895, %f895;
	mov.f32 	%f898, 0f3f800000;   	// 1
	sub.ftz.f32 	%f899, %f898, %f733;
	mul.ftz.f32 	%f900, %f897, %f899;
	sub.ftz.f32 	%f893, %f896, %f900;
$Lt_124_290306:
	.loc	22	479	0
	mov.f32 	%f901, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f901, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f902, 0f3f800000;   	// 1
	mul.ftz.f32 	%f903, %f135, %f148;
	sub.ftz.f32 	%f152, %f902, %f903;
	mov.f32 	%f904, 0f00000000;   	// 0
	max.ftz.f32 	%f905, %f893, %f904;
	mov.f32 	%f906, 0f3f800000;   	// 1
	min.ftz.f32 	%f907, %f905, %f906;
	mul.ftz.f32 	%f908, %f12, %f907;
	fma.rn.ftz.f32 	%f909, %f5, %f147, %f908;
	mul.ftz.f32 	%f910, %f149, %f909;
	fma.rn.ftz.f32 	%f886, %f9, %f152, %f910;
	.loc	22	386	0
	mov.f32 	%f911, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f10, %f911;
	mov.f32 	%f912, 0f00000000;   	// 0
	max.ftz.f32 	%f759, %f6, %f912;
	mov.f32 	%f913, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f913;
	mov.f32 	%f914, 0f3f800000;   	// 1
	min.ftz.f32 	%f763, %f759, %f914;
	mov.f32 	%f915, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p82, %f763, %f915;
	@!%p82 bra 	$Lt_124_291074;
	add.ftz.f32 	%f916, %f763, %f763;
	mul.ftz.f32 	%f917, %f761, %f916;
	bra.uni 	$Lt_124_290818;
$Lt_124_291074:
	mov.f32 	%f918, 0f3f800000;   	// 1
	sub.ftz.f32 	%f919, %f918, %f763;
	mov.f32 	%f920, 0f3f800000;   	// 1
	add.ftz.f32 	%f921, %f919, %f919;
	mov.f32 	%f922, 0f3f800000;   	// 1
	sub.ftz.f32 	%f923, %f922, %f761;
	mul.ftz.f32 	%f924, %f921, %f923;
	sub.ftz.f32 	%f917, %f920, %f924;
$Lt_124_290818:
	.loc	22	479	0
	mov.f32 	%f925, 0f00000000;   	// 0
	max.ftz.f32 	%f926, %f917, %f925;
	mov.f32 	%f927, 0f3f800000;   	// 1
	min.ftz.f32 	%f928, %f926, %f927;
	mul.ftz.f32 	%f929, %f12, %f928;
	fma.rn.ftz.f32 	%f930, %f6, %f147, %f929;
	mul.ftz.f32 	%f931, %f149, %f930;
	fma.rn.ftz.f32 	%f885, %f10, %f152, %f931;
	.loc	22	386	0
	mov.f32 	%f932, 0f00000000;   	// 0
	max.ftz.f32 	%f782, %f11, %f932;
	mov.f32 	%f933, 0f00000000;   	// 0
	max.ftz.f32 	%f784, %f7, %f933;
	mov.f32 	%f934, 0f3f800000;   	// 1
	min.ftz.f32 	%f786, %f782, %f934;
	mov.f32 	%f935, 0f3f800000;   	// 1
	min.ftz.f32 	%f788, %f784, %f935;
	mov.f32 	%f936, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p83, %f788, %f936;
	@!%p83 bra 	$Lt_124_291586;
	add.ftz.f32 	%f937, %f788, %f788;
	mul.ftz.f32 	%f938, %f786, %f937;
	bra.uni 	$Lt_124_291330;
$Lt_124_291586:
	mov.f32 	%f939, 0f3f800000;   	// 1
	sub.ftz.f32 	%f940, %f939, %f788;
	mov.f32 	%f941, 0f3f800000;   	// 1
	add.ftz.f32 	%f942, %f940, %f940;
	mov.f32 	%f943, 0f3f800000;   	// 1
	sub.ftz.f32 	%f944, %f943, %f786;
	mul.ftz.f32 	%f945, %f942, %f944;
	sub.ftz.f32 	%f938, %f941, %f945;
$Lt_124_291330:
	.loc	22	479	0
	mov.f32 	%f946, 0f00000000;   	// 0
	max.ftz.f32 	%f947, %f938, %f946;
	mov.f32 	%f948, 0f3f800000;   	// 1
	min.ftz.f32 	%f949, %f947, %f948;
	mul.ftz.f32 	%f950, %f12, %f949;
	fma.rn.ftz.f32 	%f951, %f7, %f147, %f950;
	mul.ftz.f32 	%f952, %f149, %f951;
	fma.rn.ftz.f32 	%f884, %f11, %f152, %f952;
$Lt_124_289794:
	.loc	6	204	0
	mov.f32 	%f5, %f886;
	mov.f32 	%f6, %f885;
	mov.f32 	%f7, %f884;
	mov.f32 	%f8, %f880;
	bra.uni 	$Lt_124_313346;
$Lt_124_4098:
	.loc	22	480	0
	ld.param.f32 	%f953, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f953, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f954, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f954;
	mov.f32 	%f955, %f138;
	mov.f32 	%f956, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f957, %f138, %f956;
	mov.f32 	%f958, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p84, %f957, %f958;
	@!%p84 bra 	$Lt_124_292098;
	mov.f32 	%f959, 0f00000000;   	// 0
	mov.f32 	%f960, 0f00000000;   	// 0
	mov.f32 	%f961, 0f00000000;   	// 0
	mov.f32 	%f955, 0f00000000;   	// 0
	bra.uni 	$Lt_124_291842;
$Lt_124_292098:
	.loc	22	431	0
	mov.f32 	%f962, 0f00000000;   	// 0
	max.ftz.f32 	%f729, %f9, %f962;
	mov.f32 	%f963, 0f358637bd;   	// 1e-006
	max.ftz.f32 	%f964, %f5, %f963;
	mov.f32 	%f965, 0f3f800000;   	// 1
	min.ftz.f32 	%f733, %f729, %f965;
	mov.f32 	%f966, 0f3f7fffef;   	// 0.999999
	min.ftz.f32 	%f967, %f964, %f966;
	mov.f32 	%f968, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p85, %f967, %f968;
	@!%p85 bra 	$Lt_124_242690;
	.loc	22	433	0
	mov.f32 	%f969, 0f3f800000;   	// 1
	mov.f32 	%f970, 0f3f800000;   	// 1
	sub.ftz.f32 	%f971, %f970, %f733;
	add.ftz.f32 	%f972, %f967, %f967;
	div.approx.ftz.f32 	%f973, %f971, %f972;
	sub.ftz.f32 	%f974, %f969, %f973;
	mov.f32 	%f975, 0f00000000;   	// 0
	max.ftz.f32 	%f976, %f974, %f975;
	mov.f32 	%f977, 0f3f800000;   	// 1
	min.ftz.f32 	%f978, %f976, %f977;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__301_46;
$Lt_124_242690:
	.loc	22	437	0
	mov.f32 	%f979, 0f3f800000;   	// 1
	sub.ftz.f32 	%f980, %f979, %f967;
	add.ftz.f32 	%f981, %f980, %f980;
	div.approx.ftz.f32 	%f982, %f733, %f981;
	mov.f32 	%f983, 0f00000000;   	// 0
	max.ftz.f32 	%f984, %f982, %f983;
	mov.f32 	%f985, 0f3f800000;   	// 1
	min.ftz.f32 	%f978, %f984, %f985;
$LDWendi__Z5ClampIfET_S0_S0_S0__301_46:
	.loc	22	480	0
	mov.f32 	%f986, 0f3f800000;   	// 1
	sub.ftz.f32 	%f147, %f986, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f987, 0f3f800000;   	// 1
	mul.ftz.f32 	%f988, %f135, %f148;
	sub.ftz.f32 	%f152, %f987, %f988;
	mul.ftz.f32 	%f989, %f978, %f12;
	fma.rn.ftz.f32 	%f990, %f5, %f147, %f989;
	mul.ftz.f32 	%f991, %f149, %f990;
	fma.rn.ftz.f32 	%f961, %f9, %f152, %f991;
	.loc	22	431	0
	mov.f32 	%f992, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f10, %f992;
	mov.f32 	%f993, 0f358637bd;   	// 1e-006
	max.ftz.f32 	%f994, %f6, %f993;
	mov.f32 	%f995, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f995;
	mov.f32 	%f996, 0f3f7fffef;   	// 0.999999
	min.ftz.f32 	%f997, %f994, %f996;
	mov.f32 	%f998, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p86, %f997, %f998;
	@!%p86 bra 	$Lt_124_242946;
	.loc	22	433	0
	mov.f32 	%f999, 0f3f800000;   	// 1
	mov.f32 	%f1000, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1001, %f1000, %f761;
	add.ftz.f32 	%f1002, %f997, %f997;
	div.approx.ftz.f32 	%f1003, %f1001, %f1002;
	sub.ftz.f32 	%f1004, %f999, %f1003;
	mov.f32 	%f1005, 0f00000000;  	// 0
	max.ftz.f32 	%f1006, %f1004, %f1005;
	mov.f32 	%f1007, 0f3f800000;  	// 1
	min.ftz.f32 	%f1008, %f1006, %f1007;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__301_44;
$Lt_124_242946:
	.loc	22	437	0
	mov.f32 	%f1009, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1010, %f1009, %f997;
	add.ftz.f32 	%f1011, %f1010, %f1010;
	div.approx.ftz.f32 	%f1012, %f761, %f1011;
	mov.f32 	%f1013, 0f00000000;  	// 0
	max.ftz.f32 	%f1014, %f1012, %f1013;
	mov.f32 	%f1015, 0f3f800000;  	// 1
	min.ftz.f32 	%f1008, %f1014, %f1015;
$LDWendi__Z5ClampIfET_S0_S0_S0__301_44:
	.loc	22	480	0
	mul.ftz.f32 	%f1016, %f1008, %f12;
	fma.rn.ftz.f32 	%f1017, %f6, %f147, %f1016;
	mul.ftz.f32 	%f1018, %f149, %f1017;
	fma.rn.ftz.f32 	%f960, %f10, %f152, %f1018;
	.loc	22	431	0
	mov.f32 	%f1019, 0f00000000;  	// 0
	max.ftz.f32 	%f782, %f11, %f1019;
	mov.f32 	%f1020, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1021, %f7, %f1020;
	mov.f32 	%f1022, 0f3f800000;  	// 1
	min.ftz.f32 	%f786, %f782, %f1022;
	mov.f32 	%f1023, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1024, %f1021, %f1023;
	mov.f32 	%f1025, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p87, %f1024, %f1025;
	@!%p87 bra 	$Lt_124_243202;
	.loc	22	433	0
	mov.f32 	%f1026, 0f3f800000;  	// 1
	mov.f32 	%f1027, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1028, %f1027, %f786;
	add.ftz.f32 	%f1029, %f1024, %f1024;
	div.approx.ftz.f32 	%f1030, %f1028, %f1029;
	sub.ftz.f32 	%f1031, %f1026, %f1030;
	mov.f32 	%f1032, 0f00000000;  	// 0
	max.ftz.f32 	%f1033, %f1031, %f1032;
	mov.f32 	%f1034, 0f3f800000;  	// 1
	min.ftz.f32 	%f1035, %f1033, %f1034;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__301_42;
$Lt_124_243202:
	.loc	22	437	0
	mov.f32 	%f1036, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1037, %f1036, %f1024;
	add.ftz.f32 	%f1038, %f1037, %f1037;
	div.approx.ftz.f32 	%f1039, %f786, %f1038;
	mov.f32 	%f1040, 0f00000000;  	// 0
	max.ftz.f32 	%f1041, %f1039, %f1040;
	mov.f32 	%f1042, 0f3f800000;  	// 1
	min.ftz.f32 	%f1035, %f1041, %f1042;
$LDWendi__Z5ClampIfET_S0_S0_S0__301_42:
	.loc	22	480	0
	mul.ftz.f32 	%f1043, %f1035, %f12;
	fma.rn.ftz.f32 	%f1044, %f7, %f147, %f1043;
	mul.ftz.f32 	%f1045, %f149, %f1044;
	fma.rn.ftz.f32 	%f959, %f11, %f152, %f1045;
$Lt_124_291842:
	.loc	6	205	0
	mov.f32 	%f5, %f961;
	mov.f32 	%f6, %f960;
	mov.f32 	%f7, %f959;
	mov.f32 	%f8, %f955;
	bra.uni 	$Lt_124_313346;
$Lt_124_4354:
	.loc	22	481	0
	ld.param.f32 	%f1046, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1046, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1047, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1047;
	mov.f32 	%f1048, %f138;
	mov.f32 	%f1049, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1050, %f138, %f1049;
	mov.f32 	%f1051, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p88, %f1050, %f1051;
	@!%p88 bra 	$Lt_124_292610;
	mov.f32 	%f1052, 0f00000000;  	// 0
	mov.f32 	%f1053, 0f00000000;  	// 0
	mov.f32 	%f1054, 0f00000000;  	// 0
	mov.f32 	%f1048, 0f00000000;  	// 0
	bra.uni 	$Lt_124_292354;
$Lt_124_292610:
	mov.f32 	%f1055, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1055, %f12;
	mov.f32 	%f1056, 0f00000000;  	// 0
	max.ftz.f32 	%f731, %f5, %f1056;
	mov.f32 	%f1057, 0f3f800000;  	// 1
	min.ftz.f32 	%f735, %f731, %f1057;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1058, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1059, %f135, %f148;
	sub.ftz.f32 	%f152, %f1058, %f1059;
	add.ftz.f32 	%f1060, %f735, %f735;
	mov.f32 	%f1061, 0f00000000;  	// 0
	max.ftz.f32 	%f1062, %f9, %f1061;
	mov.f32 	%f1063, 0f3f800000;  	// 1
	min.ftz.f32 	%f1064, %f1062, %f1063;
	add.ftz.f32 	%f1065, %f1060, %f1064;
	mov.f32 	%f1066, 0fbf800000;  	// -1
	add.ftz.f32 	%f1067, %f1065, %f1066;
	mul.ftz.f32 	%f1068, %f12, %f1067;
	fma.rn.ftz.f32 	%f1069, %f5, %f147, %f1068;
	mul.ftz.f32 	%f1070, %f149, %f1069;
	fma.rn.ftz.f32 	%f1054, %f9, %f152, %f1070;
	mov.f32 	%f1071, 0f00000000;  	// 0
	max.ftz.f32 	%f759, %f6, %f1071;
	mov.f32 	%f1072, 0f3f800000;  	// 1
	min.ftz.f32 	%f763, %f759, %f1072;
	add.ftz.f32 	%f1073, %f763, %f763;
	mov.f32 	%f1074, 0f00000000;  	// 0
	max.ftz.f32 	%f1075, %f10, %f1074;
	mov.f32 	%f1076, 0f3f800000;  	// 1
	min.ftz.f32 	%f1077, %f1075, %f1076;
	add.ftz.f32 	%f1078, %f1073, %f1077;
	mov.f32 	%f1079, 0fbf800000;  	// -1
	add.ftz.f32 	%f1080, %f1078, %f1079;
	mul.ftz.f32 	%f1081, %f12, %f1080;
	fma.rn.ftz.f32 	%f1082, %f6, %f147, %f1081;
	mul.ftz.f32 	%f1083, %f149, %f1082;
	fma.rn.ftz.f32 	%f1053, %f10, %f152, %f1083;
	mov.f32 	%f1084, 0f00000000;  	// 0
	max.ftz.f32 	%f784, %f7, %f1084;
	mov.f32 	%f1085, 0f3f800000;  	// 1
	min.ftz.f32 	%f788, %f784, %f1085;
	add.ftz.f32 	%f1086, %f788, %f788;
	mov.f32 	%f1087, 0f00000000;  	// 0
	max.ftz.f32 	%f1088, %f11, %f1087;
	mov.f32 	%f1089, 0f3f800000;  	// 1
	min.ftz.f32 	%f1090, %f1088, %f1089;
	add.ftz.f32 	%f1091, %f1086, %f1090;
	mov.f32 	%f1092, 0fbf800000;  	// -1
	add.ftz.f32 	%f1093, %f1091, %f1092;
	mul.ftz.f32 	%f1094, %f12, %f1093;
	fma.rn.ftz.f32 	%f1095, %f7, %f147, %f1094;
	mul.ftz.f32 	%f1096, %f149, %f1095;
	fma.rn.ftz.f32 	%f1052, %f11, %f152, %f1096;
$Lt_124_292354:
	.loc	6	206	0
	mov.f32 	%f5, %f1054;
	mov.f32 	%f6, %f1053;
	mov.f32 	%f7, %f1052;
	mov.f32 	%f8, %f1048;
	bra.uni 	$Lt_124_313346;
$Lt_124_4610:
	.loc	22	482	0
	ld.param.f32 	%f1097, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1097, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1098, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1098;
	mov.f32 	%f1099, %f138;
	mov.f32 	%f1100, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1101, %f138, %f1100;
	mov.f32 	%f1102, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p89, %f1101, %f1102;
	@!%p89 bra 	$Lt_124_293122;
	mov.f32 	%f1103, 0f00000000;  	// 0
	mov.f32 	%f1104, 0f00000000;  	// 0
	mov.f32 	%f1105, 0f00000000;  	// 0
	mov.f32 	%f1099, 0f00000000;  	// 0
	bra.uni 	$Lt_124_292866;
$Lt_124_293122:
	.loc	22	450	0
	mov.f32 	%f1106, 0f00000000;  	// 0
	max.ftz.f32 	%f729, %f9, %f1106;
	mov.f32 	%f1107, 0f00000000;  	// 0
	max.ftz.f32 	%f731, %f5, %f1107;
	mov.f32 	%f1108, 0f3f800000;  	// 1
	min.ftz.f32 	%f733, %f729, %f1108;
	mov.f32 	%f1109, 0f3f800000;  	// 1
	min.ftz.f32 	%f735, %f731, %f1109;
	add.ftz.f32 	%f1110, %f735, %f735;
	mov.f32 	%f1111, 0fbf800000;  	// -1
	add.ftz.f32 	%f1112, %f1110, %f1111;
	setp.gt.ftz.f32 	%p90, %f1112, %f733;
	@!%p90 bra 	$Lt_124_243970;
	.loc	22	452	0
	mov.f32 	%f1113, %f1112;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__301_40;
$Lt_124_243970:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p91, %f1110, %f733;
	@!%p91 bra 	$Lt_124_244226;
	.loc	22	456	0
	mov.f32 	%f1113, %f1110;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__301_40;
$Lt_124_244226:
	.loc	22	460	0
	mov.f32 	%f1113, %f733;
$LDWendi__Z5ClampIfET_S0_S0_S0__301_40:
	.loc	22	482	0
	mov.f32 	%f1114, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1114, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1115, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1116, %f135, %f148;
	sub.ftz.f32 	%f152, %f1115, %f1116;
	mul.ftz.f32 	%f1117, %f1113, %f12;
	fma.rn.ftz.f32 	%f1118, %f5, %f147, %f1117;
	mul.ftz.f32 	%f1119, %f149, %f1118;
	fma.rn.ftz.f32 	%f1105, %f9, %f152, %f1119;
	.loc	22	450	0
	mov.f32 	%f1120, 0f00000000;  	// 0
	max.ftz.f32 	%f757, %f10, %f1120;
	mov.f32 	%f1121, 0f00000000;  	// 0
	max.ftz.f32 	%f759, %f6, %f1121;
	mov.f32 	%f1122, 0f3f800000;  	// 1
	min.ftz.f32 	%f761, %f757, %f1122;
	mov.f32 	%f1123, 0f3f800000;  	// 1
	min.ftz.f32 	%f763, %f759, %f1123;
	add.ftz.f32 	%f1124, %f763, %f763;
	mov.f32 	%f1125, 0fbf800000;  	// -1
	add.ftz.f32 	%f1126, %f1124, %f1125;
	setp.gt.ftz.f32 	%p92, %f1126, %f761;
	@!%p92 bra 	$Lt_124_244482;
	.loc	22	452	0
	mov.f32 	%f1127, %f1126;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__301_38;
$Lt_124_244482:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p93, %f1124, %f761;
	@!%p93 bra 	$Lt_124_244738;
	.loc	22	456	0
	mov.f32 	%f1127, %f1124;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__301_38;
$Lt_124_244738:
	.loc	22	460	0
	mov.f32 	%f1127, %f761;
$LDWendi__Z5ClampIfET_S0_S0_S0__301_38:
	.loc	22	482	0
	mul.ftz.f32 	%f1128, %f1127, %f12;
	fma.rn.ftz.f32 	%f1129, %f6, %f147, %f1128;
	mul.ftz.f32 	%f1130, %f149, %f1129;
	fma.rn.ftz.f32 	%f1104, %f10, %f152, %f1130;
	.loc	22	450	0
	mov.f32 	%f1131, 0f00000000;  	// 0
	max.ftz.f32 	%f782, %f11, %f1131;
	mov.f32 	%f1132, 0f00000000;  	// 0
	max.ftz.f32 	%f784, %f7, %f1132;
	mov.f32 	%f1133, 0f3f800000;  	// 1
	min.ftz.f32 	%f786, %f782, %f1133;
	mov.f32 	%f1134, 0f3f800000;  	// 1
	min.ftz.f32 	%f788, %f784, %f1134;
	add.ftz.f32 	%f1135, %f788, %f788;
	mov.f32 	%f1136, 0fbf800000;  	// -1
	add.ftz.f32 	%f1137, %f1135, %f1136;
	setp.gt.ftz.f32 	%p94, %f1137, %f786;
	@!%p94 bra 	$Lt_124_244994;
	.loc	22	452	0
	mov.f32 	%f1138, %f1137;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__301_36;
$Lt_124_244994:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p95, %f1135, %f786;
	@!%p95 bra 	$Lt_124_245250;
	.loc	22	456	0
	mov.f32 	%f1138, %f1135;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__301_36;
$Lt_124_245250:
	.loc	22	460	0
	mov.f32 	%f1138, %f786;
$LDWendi__Z5ClampIfET_S0_S0_S0__301_36:
	.loc	22	482	0
	mul.ftz.f32 	%f1139, %f1138, %f12;
	fma.rn.ftz.f32 	%f1140, %f7, %f147, %f1139;
	mul.ftz.f32 	%f1141, %f149, %f1140;
	fma.rn.ftz.f32 	%f1103, %f11, %f152, %f1141;
$Lt_124_292866:
	.loc	6	207	0
	mov.f32 	%f5, %f1105;
	mov.f32 	%f6, %f1104;
	mov.f32 	%f7, %f1103;
	mov.f32 	%f8, %f1099;
	bra.uni 	$Lt_124_313346;
$Lt_124_4866:
	.loc	22	483	0
	ld.param.f32 	%f1142, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1142, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1143, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1143;
	mov.f32 	%f1144, %f138;
	mov.f32 	%f1145, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1146, %f138, %f1145;
	mov.f32 	%f1147, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p96, %f1146, %f1147;
	@!%p96 bra 	$Lt_124_293634;
	mov.f32 	%f1148, 0f00000000;  	// 0
	mov.f32 	%f1149, 0f00000000;  	// 0
	mov.f32 	%f1150, 0f00000000;  	// 0
	mov.f32 	%f1144, 0f00000000;  	// 0
	bra.uni 	$Lt_124_293378;
$Lt_124_293634:
	mov.f32 	%f1151, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1151, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1152, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1153, %f135, %f148;
	sub.ftz.f32 	%f152, %f1152, %f1153;
	mov.f32 	%f1154, 0f00000000;  	// 0
	mov.f32 	%f1155, 0f3f800000;  	// 1
	mov.f32 	%f1156, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1157, %f1156, %f9;
	setp.lt.ftz.f32 	%p97, %f5, %f1157;
	selp.f32 	%f1158, %f1154, %f1155, %p97;
	mul.ftz.f32 	%f1159, %f1158, %f12;
	fma.rn.ftz.f32 	%f1160, %f5, %f147, %f1159;
	mul.ftz.f32 	%f1161, %f149, %f1160;
	fma.rn.ftz.f32 	%f1150, %f9, %f152, %f1161;
	mov.f32 	%f1162, 0f00000000;  	// 0
	mov.f32 	%f1163, 0f3f800000;  	// 1
	mov.f32 	%f1164, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1165, %f1164, %f10;
	setp.lt.ftz.f32 	%p98, %f6, %f1165;
	selp.f32 	%f1166, %f1162, %f1163, %p98;
	mul.ftz.f32 	%f1167, %f1166, %f12;
	fma.rn.ftz.f32 	%f1168, %f6, %f147, %f1167;
	mul.ftz.f32 	%f1169, %f149, %f1168;
	fma.rn.ftz.f32 	%f1149, %f10, %f152, %f1169;
	mov.f32 	%f1170, 0f00000000;  	// 0
	mov.f32 	%f1171, 0f3f800000;  	// 1
	mov.f32 	%f1172, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1173, %f1172, %f11;
	setp.lt.ftz.f32 	%p99, %f7, %f1173;
	selp.f32 	%f1174, %f1170, %f1171, %p99;
	mul.ftz.f32 	%f1175, %f1174, %f12;
	fma.rn.ftz.f32 	%f1176, %f7, %f147, %f1175;
	mul.ftz.f32 	%f1177, %f149, %f1176;
	fma.rn.ftz.f32 	%f1148, %f11, %f152, %f1177;
$Lt_124_293378:
	.loc	6	208	0
	mov.f32 	%f5, %f1150;
	mov.f32 	%f6, %f1149;
	mov.f32 	%f7, %f1148;
	mov.f32 	%f8, %f1144;
	bra.uni 	$Lt_124_313346;
$Lt_124_5122:
	.loc	22	484	0
	ld.param.f32 	%f1178, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1178, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1179, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1179;
	mov.f32 	%f1180, %f138;
	mov.f32 	%f1181, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1182, %f138, %f1181;
	mov.f32 	%f1183, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p100, %f1182, %f1183;
	@!%p100 bra 	$Lt_124_294146;
	mov.f32 	%f1184, 0f00000000;  	// 0
	mov.f32 	%f1185, 0f00000000;  	// 0
	mov.f32 	%f1186, 0f00000000;  	// 0
	mov.f32 	%f1180, 0f00000000;  	// 0
	bra.uni 	$Lt_124_293890;
$Lt_124_294146:
	mov.f32 	%f1187, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1187, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1188, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1189, %f135, %f148;
	sub.ftz.f32 	%f152, %f1188, %f1189;
	sub.ftz.f32 	%f1190, %f5, %f9;
	abs.ftz.f32 	%f1191, %f1190;
	mul.ftz.f32 	%f1192, %f12, %f1191;
	fma.rn.ftz.f32 	%f1193, %f5, %f147, %f1192;
	mul.ftz.f32 	%f1194, %f149, %f1193;
	fma.rn.ftz.f32 	%f1186, %f9, %f152, %f1194;
	sub.ftz.f32 	%f1195, %f6, %f10;
	abs.ftz.f32 	%f1196, %f1195;
	mul.ftz.f32 	%f1197, %f12, %f1196;
	fma.rn.ftz.f32 	%f1198, %f6, %f147, %f1197;
	mul.ftz.f32 	%f1199, %f149, %f1198;
	fma.rn.ftz.f32 	%f1185, %f10, %f152, %f1199;
	sub.ftz.f32 	%f1200, %f7, %f11;
	abs.ftz.f32 	%f1201, %f1200;
	mul.ftz.f32 	%f1202, %f12, %f1201;
	fma.rn.ftz.f32 	%f1203, %f7, %f147, %f1202;
	mul.ftz.f32 	%f1204, %f149, %f1203;
	fma.rn.ftz.f32 	%f1184, %f11, %f152, %f1204;
$Lt_124_293890:
	.loc	6	209	0
	mov.f32 	%f5, %f1186;
	mov.f32 	%f6, %f1185;
	mov.f32 	%f7, %f1184;
	mov.f32 	%f8, %f1180;
	bra.uni 	$Lt_124_313346;
$Lt_124_5378:
	.loc	22	485	0
	ld.param.f32 	%f1205, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1205, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1206, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1206;
	mov.f32 	%f1207, %f138;
	mov.f32 	%f1208, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1209, %f138, %f1208;
	mov.f32 	%f1210, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p101, %f1209, %f1210;
	@!%p101 bra 	$Lt_124_294658;
	mov.f32 	%f1211, 0f00000000;  	// 0
	mov.f32 	%f1212, 0f00000000;  	// 0
	mov.f32 	%f1213, 0f00000000;  	// 0
	mov.f32 	%f1207, 0f00000000;  	// 0
	bra.uni 	$Lt_124_294402;
$Lt_124_294658:
	mov.f32 	%f1214, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1214, %f12;
	mov.f32 	%f1215, 0f00000000;  	// 0
	max.ftz.f32 	%f729, %f9, %f1215;
	mov.f32 	%f1216, 0f00000000;  	// 0
	max.ftz.f32 	%f731, %f5, %f1216;
	mov.f32 	%f1217, 0f3f800000;  	// 1
	min.ftz.f32 	%f733, %f729, %f1217;
	mov.f32 	%f1218, 0f3f800000;  	// 1
	min.ftz.f32 	%f735, %f731, %f1218;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1219, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1220, %f135, %f148;
	sub.ftz.f32 	%f152, %f1219, %f1220;
	add.ftz.f32 	%f1221, %f735, %f733;
	add.ftz.f32 	%f1222, %f735, %f735;
	mul.ftz.f32 	%f1223, %f733, %f1222;
	sub.ftz.f32 	%f1224, %f1221, %f1223;
	mov.f32 	%f1225, 0f00000000;  	// 0
	max.ftz.f32 	%f1226, %f1224, %f1225;
	mov.f32 	%f1227, 0f3f800000;  	// 1
	min.ftz.f32 	%f1228, %f1226, %f1227;
	mul.ftz.f32 	%f1229, %f12, %f1228;
	fma.rn.ftz.f32 	%f1230, %f5, %f147, %f1229;
	mul.ftz.f32 	%f1231, %f149, %f1230;
	fma.rn.ftz.f32 	%f1213, %f9, %f152, %f1231;
	mov.f32 	%f1232, 0f00000000;  	// 0
	max.ftz.f32 	%f757, %f10, %f1232;
	mov.f32 	%f1233, 0f00000000;  	// 0
	max.ftz.f32 	%f759, %f6, %f1233;
	mov.f32 	%f1234, 0f3f800000;  	// 1
	min.ftz.f32 	%f761, %f757, %f1234;
	mov.f32 	%f1235, 0f3f800000;  	// 1
	min.ftz.f32 	%f763, %f759, %f1235;
	add.ftz.f32 	%f1236, %f763, %f761;
	add.ftz.f32 	%f1237, %f763, %f763;
	mul.ftz.f32 	%f1238, %f761, %f1237;
	sub.ftz.f32 	%f1239, %f1236, %f1238;
	mov.f32 	%f1240, 0f00000000;  	// 0
	max.ftz.f32 	%f1241, %f1239, %f1240;
	mov.f32 	%f1242, 0f3f800000;  	// 1
	min.ftz.f32 	%f1243, %f1241, %f1242;
	mul.ftz.f32 	%f1244, %f12, %f1243;
	fma.rn.ftz.f32 	%f1245, %f6, %f147, %f1244;
	mul.ftz.f32 	%f1246, %f149, %f1245;
	fma.rn.ftz.f32 	%f1212, %f10, %f152, %f1246;
	mov.f32 	%f1247, 0f00000000;  	// 0
	max.ftz.f32 	%f782, %f11, %f1247;
	mov.f32 	%f1248, 0f00000000;  	// 0
	max.ftz.f32 	%f784, %f7, %f1248;
	mov.f32 	%f1249, 0f3f800000;  	// 1
	min.ftz.f32 	%f786, %f782, %f1249;
	mov.f32 	%f1250, 0f3f800000;  	// 1
	min.ftz.f32 	%f788, %f784, %f1250;
	add.ftz.f32 	%f1251, %f788, %f786;
	add.ftz.f32 	%f1252, %f788, %f788;
	mul.ftz.f32 	%f1253, %f786, %f1252;
	sub.ftz.f32 	%f1254, %f1251, %f1253;
	mov.f32 	%f1255, 0f00000000;  	// 0
	max.ftz.f32 	%f1256, %f1254, %f1255;
	mov.f32 	%f1257, 0f3f800000;  	// 1
	min.ftz.f32 	%f1258, %f1256, %f1257;
	mul.ftz.f32 	%f1259, %f12, %f1258;
	fma.rn.ftz.f32 	%f1260, %f7, %f147, %f1259;
	mul.ftz.f32 	%f1261, %f149, %f1260;
	fma.rn.ftz.f32 	%f1211, %f11, %f152, %f1261;
$Lt_124_294402:
	.loc	6	210	0
	mov.f32 	%f5, %f1213;
	mov.f32 	%f6, %f1212;
	mov.f32 	%f7, %f1211;
	mov.f32 	%f8, %f1207;
	bra.uni 	$Lt_124_313346;
$Lt_124_5634:
	.loc	22	486	0
	ld.param.f32 	%f1262, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1262, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1263, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1263;
	mov.f32 	%f1264, %f138;
	mov.f32 	%f1265, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1266, %f138, %f1265;
	mov.f32 	%f1267, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p102, %f1266, %f1267;
	@!%p102 bra 	$Lt_124_295170;
	mov.f32 	%f1268, 0f00000000;  	// 0
	mov.f32 	%f1269, 0f00000000;  	// 0
	mov.f32 	%f1270, 0f00000000;  	// 0
	mov.f32 	%f1264, 0f00000000;  	// 0
	bra.uni 	$Lt_124_294914;
$Lt_124_295170:
	mov.f32 	%f1271, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1271, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1272, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1273, %f135, %f148;
	sub.ftz.f32 	%f152, %f1272, %f1273;
	mov.f32 	%f1274, 0f00000000;  	// 0
	max.ftz.f32 	%f1275, %f9, %f1274;
	mov.f32 	%f1276, 0f3f800000;  	// 1
	min.ftz.f32 	%f1277, %f1275, %f1276;
	mov.f32 	%f1278, 0f00000000;  	// 0
	max.ftz.f32 	%f1279, %f5, %f1278;
	mov.f32 	%f1280, 0f3f800000;  	// 1
	min.ftz.f32 	%f1281, %f1279, %f1280;
	sub.ftz.f32 	%f1282, %f1277, %f1281;
	mov.f32 	%f1283, 0f00000000;  	// 0
	max.ftz.f32 	%f1284, %f1282, %f1283;
	mov.f32 	%f1285, 0f3f800000;  	// 1
	min.ftz.f32 	%f1286, %f1284, %f1285;
	mul.ftz.f32 	%f1287, %f12, %f1286;
	fma.rn.ftz.f32 	%f1288, %f5, %f147, %f1287;
	mul.ftz.f32 	%f1289, %f149, %f1288;
	fma.rn.ftz.f32 	%f1270, %f9, %f152, %f1289;
	mov.f32 	%f1290, 0f00000000;  	// 0
	max.ftz.f32 	%f1291, %f10, %f1290;
	mov.f32 	%f1292, 0f3f800000;  	// 1
	min.ftz.f32 	%f1293, %f1291, %f1292;
	mov.f32 	%f1294, 0f00000000;  	// 0
	max.ftz.f32 	%f1295, %f6, %f1294;
	mov.f32 	%f1296, 0f3f800000;  	// 1
	min.ftz.f32 	%f1297, %f1295, %f1296;
	sub.ftz.f32 	%f1298, %f1293, %f1297;
	mov.f32 	%f1299, 0f00000000;  	// 0
	max.ftz.f32 	%f1300, %f1298, %f1299;
	mov.f32 	%f1301, 0f3f800000;  	// 1
	min.ftz.f32 	%f1302, %f1300, %f1301;
	mul.ftz.f32 	%f1303, %f12, %f1302;
	fma.rn.ftz.f32 	%f1304, %f6, %f147, %f1303;
	mul.ftz.f32 	%f1305, %f149, %f1304;
	fma.rn.ftz.f32 	%f1269, %f10, %f152, %f1305;
	mov.f32 	%f1306, 0f00000000;  	// 0
	max.ftz.f32 	%f1307, %f11, %f1306;
	mov.f32 	%f1308, 0f3f800000;  	// 1
	min.ftz.f32 	%f1309, %f1307, %f1308;
	mov.f32 	%f1310, 0f00000000;  	// 0
	max.ftz.f32 	%f1311, %f7, %f1310;
	mov.f32 	%f1312, 0f3f800000;  	// 1
	min.ftz.f32 	%f1313, %f1311, %f1312;
	sub.ftz.f32 	%f1314, %f1309, %f1313;
	mov.f32 	%f1315, 0f00000000;  	// 0
	max.ftz.f32 	%f1316, %f1314, %f1315;
	mov.f32 	%f1317, 0f3f800000;  	// 1
	min.ftz.f32 	%f1318, %f1316, %f1317;
	mul.ftz.f32 	%f1319, %f12, %f1318;
	fma.rn.ftz.f32 	%f1320, %f7, %f147, %f1319;
	mul.ftz.f32 	%f1321, %f149, %f1320;
	fma.rn.ftz.f32 	%f1268, %f11, %f152, %f1321;
$Lt_124_294914:
	.loc	6	211	0
	mov.f32 	%f5, %f1270;
	mov.f32 	%f6, %f1269;
	mov.f32 	%f7, %f1268;
	mov.f32 	%f8, %f1264;
	bra.uni 	$Lt_124_313346;
$Lt_124_5890:
	.loc	22	487	0
	ld.param.f32 	%f1322, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1322, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1323, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1323;
	mov.f32 	%f1324, %f138;
	mov.f32 	%f1325, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1326, %f138, %f1325;
	mov.f32 	%f1327, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p103, %f1326, %f1327;
	@!%p103 bra 	$Lt_124_295682;
	mov.f32 	%f1328, 0f00000000;  	// 0
	mov.f32 	%f1329, 0f00000000;  	// 0
	mov.f32 	%f1330, 0f00000000;  	// 0
	mov.f32 	%f1324, 0f00000000;  	// 0
	bra.uni 	$Lt_124_295426;
$Lt_124_295682:
	mov.f32 	%f1331, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1331, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1332, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1333, %f135, %f148;
	sub.ftz.f32 	%f152, %f1332, %f1333;
	mov.f32 	%f1334, 0f00000000;  	// 0
	max.ftz.f32 	%f1335, %f9, %f1334;
	mov.f32 	%f1336, 0f3f800000;  	// 1
	min.ftz.f32 	%f1337, %f1335, %f1336;
	mov.f32 	%f1338, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1339, %f5, %f1338;
	mov.f32 	%f1340, 0f3f800000;  	// 1
	min.ftz.f32 	%f1341, %f1339, %f1340;
	div.approx.ftz.f32 	%f1342, %f1337, %f1341;
	mov.f32 	%f1343, 0f00000000;  	// 0
	max.ftz.f32 	%f1344, %f1342, %f1343;
	mov.f32 	%f1345, 0f3f800000;  	// 1
	min.ftz.f32 	%f1346, %f1344, %f1345;
	mul.ftz.f32 	%f1347, %f12, %f1346;
	fma.rn.ftz.f32 	%f1348, %f5, %f147, %f1347;
	mul.ftz.f32 	%f1349, %f149, %f1348;
	fma.rn.ftz.f32 	%f1330, %f9, %f152, %f1349;
	mov.f32 	%f1350, 0f00000000;  	// 0
	max.ftz.f32 	%f1351, %f10, %f1350;
	mov.f32 	%f1352, 0f3f800000;  	// 1
	min.ftz.f32 	%f1353, %f1351, %f1352;
	mov.f32 	%f1354, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1355, %f6, %f1354;
	mov.f32 	%f1356, 0f3f800000;  	// 1
	min.ftz.f32 	%f1357, %f1355, %f1356;
	div.approx.ftz.f32 	%f1358, %f1353, %f1357;
	mov.f32 	%f1359, 0f00000000;  	// 0
	max.ftz.f32 	%f1360, %f1358, %f1359;
	mov.f32 	%f1361, 0f3f800000;  	// 1
	min.ftz.f32 	%f1362, %f1360, %f1361;
	mul.ftz.f32 	%f1363, %f12, %f1362;
	fma.rn.ftz.f32 	%f1364, %f6, %f147, %f1363;
	mul.ftz.f32 	%f1365, %f149, %f1364;
	fma.rn.ftz.f32 	%f1329, %f10, %f152, %f1365;
	mov.f32 	%f1366, 0f00000000;  	// 0
	max.ftz.f32 	%f1367, %f11, %f1366;
	mov.f32 	%f1368, 0f3f800000;  	// 1
	min.ftz.f32 	%f1369, %f1367, %f1368;
	mov.f32 	%f1370, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1371, %f7, %f1370;
	mov.f32 	%f1372, 0f3f800000;  	// 1
	min.ftz.f32 	%f1373, %f1371, %f1372;
	div.approx.ftz.f32 	%f1374, %f1369, %f1373;
	mov.f32 	%f1375, 0f00000000;  	// 0
	max.ftz.f32 	%f1376, %f1374, %f1375;
	mov.f32 	%f1377, 0f3f800000;  	// 1
	min.ftz.f32 	%f1378, %f1376, %f1377;
	mul.ftz.f32 	%f1379, %f12, %f1378;
	fma.rn.ftz.f32 	%f1380, %f7, %f147, %f1379;
	mul.ftz.f32 	%f1381, %f149, %f1380;
	fma.rn.ftz.f32 	%f1328, %f11, %f152, %f1381;
$Lt_124_295426:
	.loc	6	212	0
	mov.f32 	%f5, %f1330;
	mov.f32 	%f6, %f1329;
	mov.f32 	%f7, %f1328;
	mov.f32 	%f8, %f1324;
	bra.uni 	$Lt_124_313346;
$Lt_124_6146:
	.loc	22	154	0
	setp.lt.ftz.f32 	%p104, %f5, %f6;
	max.ftz.f32 	%f1382, %f5, %f6;
	selp.f32 	%f1383, %f5, %f6, %p104;
	max.ftz.f32 	%f1384, %f1382, %f7;
	setp.lt.ftz.f32 	%p105, %f1383, %f7;
	selp.f32 	%f1385, %f1383, %f7, %p105;
	setp.eq.ftz.f32 	%p106, %f1385, %f7;
	@!%p106 bra 	$Lt_124_296194;
	setp.eq.ftz.f32 	%p107, %f1384, %f6;
	@!%p107 bra 	$Lt_124_296706;
	setp.gt.ftz.f32 	%p108, %f6, %f7;
	@!%p108 bra 	$Lt_124_297218;
	.loc	22	161	0
	max.ftz.f32 	%f1386, %f9, %f10;
	setp.lt.ftz.f32 	%p109, %f9, %f10;
	max.ftz.f32 	%f1387, %f1386, %f11;
	selp.f32 	%f1388, %f9, %f10, %p109;
	setp.lt.ftz.f32 	%p110, %f1388, %f11;
	selp.f32 	%f1389, %f1388, %f11, %p110;
	sub.ftz.f32 	%f1390, %f1387, %f1389;
	cvt.ftz.sat.f32.f32 	%f1391, %f1390;
	sub.ftz.f32 	%f1392, %f5, %f7;
	mul.ftz.f32 	%f1393, %f1391, %f1392;
	sub.ftz.f32 	%f1394, %f6, %f7;
	div.approx.ftz.f32 	%f1395, %f1393, %f1394;
	.loc	22	162	0
	mov.f32 	%f1396, %f1391;
	bra.uni 	$Lt_124_297474;
$Lt_124_297218:
	.loc	22	166	0
	mov.f32 	%f1395, 0f00000000;  	// 0
	mov.f32 	%f1396, 0f00000000;  	// 0
	bra.uni 	$Lt_124_297474;
$Lt_124_296706:
	setp.gt.ftz.f32 	%p111, %f5, %f7;
	@!%p111 bra 	$Lt_124_297730;
	.loc	22	173	0
	max.ftz.f32 	%f1386, %f9, %f10;
	setp.lt.ftz.f32 	%p109, %f9, %f10;
	max.ftz.f32 	%f1387, %f1386, %f11;
	selp.f32 	%f1388, %f9, %f10, %p109;
	setp.lt.ftz.f32 	%p110, %f1388, %f11;
	selp.f32 	%f1389, %f1388, %f11, %p110;
	sub.ftz.f32 	%f1390, %f1387, %f1389;
	cvt.ftz.sat.f32.f32 	%f1391, %f1390;
	sub.ftz.f32 	%f1397, %f6, %f7;
	mul.ftz.f32 	%f1398, %f1391, %f1397;
	sub.ftz.f32 	%f1399, %f5, %f7;
	div.approx.ftz.f32 	%f1396, %f1398, %f1399;
	.loc	22	174	0
	mov.f32 	%f1395, %f1391;
	bra.uni 	$Lt_124_297474;
$Lt_124_297730:
	.loc	22	178	0
	mov.f32 	%f1395, 0f00000000;  	// 0
	mov.f32 	%f1396, 0f00000000;  	// 0
$Lt_124_297474:
$Lt_124_296450:
	mov.f32 	%f1400, 0f00000000;  	// 0
	bra.uni 	$Lt_124_300034;
$Lt_124_296194:
	setp.eq.ftz.f32 	%p112, %f1385, %f6;
	setp.eq.ftz.f32 	%p113, %f1384, %f7;
	@!%p113 bra 	$Lt_124_298242;
	@!%p112 bra 	$Lt_124_298754;
	setp.lt.ftz.f32 	%p114, %f6, %f7;
	@!%p114 bra 	$Lt_124_299266;
	.loc	22	191	0
	max.ftz.f32 	%f1386, %f9, %f10;
	setp.lt.ftz.f32 	%p109, %f9, %f10;
	max.ftz.f32 	%f1387, %f1386, %f11;
	selp.f32 	%f1388, %f9, %f10, %p109;
	setp.lt.ftz.f32 	%p110, %f1388, %f11;
	selp.f32 	%f1389, %f1388, %f11, %p110;
	sub.ftz.f32 	%f1390, %f1387, %f1389;
	cvt.ftz.sat.f32.f32 	%f1391, %f1390;
	sub.ftz.f32 	%f1401, %f5, %f6;
	mul.ftz.f32 	%f1402, %f1391, %f1401;
	sub.ftz.f32 	%f1403, %f7, %f6;
	div.approx.ftz.f32 	%f1395, %f1402, %f1403;
	.loc	22	192	0
	mov.f32 	%f1400, %f1391;
	bra.uni 	$Lt_124_299010;
$Lt_124_299266:
	.loc	22	196	0
	mov.f32 	%f1395, 0f00000000;  	// 0
	mov.f32 	%f1400, 0f00000000;  	// 0
$Lt_124_299010:
	mov.f32 	%f1396, 0f00000000;  	// 0
	bra.uni 	$Lt_124_300034;
$Lt_124_298754:
	setp.lt.ftz.f32 	%p115, %f5, %f7;
	@!%p115 bra 	$Lt_124_299778;
	.loc	22	204	0
	max.ftz.f32 	%f1386, %f9, %f10;
	setp.lt.ftz.f32 	%p109, %f9, %f10;
	max.ftz.f32 	%f1387, %f1386, %f11;
	selp.f32 	%f1388, %f9, %f10, %p109;
	setp.lt.ftz.f32 	%p110, %f1388, %f11;
	selp.f32 	%f1389, %f1388, %f11, %p110;
	sub.ftz.f32 	%f1390, %f1387, %f1389;
	cvt.ftz.sat.f32.f32 	%f1391, %f1390;
	sub.ftz.f32 	%f1404, %f6, %f5;
	mul.ftz.f32 	%f1405, %f1391, %f1404;
	sub.ftz.f32 	%f1406, %f7, %f5;
	div.approx.ftz.f32 	%f1396, %f1405, %f1406;
	.loc	22	205	0
	mov.f32 	%f1400, %f1391;
	bra.uni 	$Lt_124_299522;
$Lt_124_299778:
	.loc	22	209	0
	mov.f32 	%f1400, 0f00000000;  	// 0
	mov.f32 	%f1396, 0f00000000;  	// 0
$Lt_124_299522:
	.loc	22	211	0
	mov.f32 	%f1395, 0f00000000;  	// 0
	bra.uni 	$Lt_124_300034;
$Lt_124_298242:
	@!%p112 bra 	$Lt_124_300290;
	setp.gt.ftz.f32 	%p116, %f5, %f6;
	@!%p116 bra 	$Lt_124_300802;
	.loc	22	220	0
	max.ftz.f32 	%f1386, %f9, %f10;
	setp.lt.ftz.f32 	%p109, %f9, %f10;
	max.ftz.f32 	%f1387, %f1386, %f11;
	selp.f32 	%f1388, %f9, %f10, %p109;
	setp.lt.ftz.f32 	%p110, %f1388, %f11;
	selp.f32 	%f1389, %f1388, %f11, %p110;
	sub.ftz.f32 	%f1390, %f1387, %f1389;
	cvt.ftz.sat.f32.f32 	%f1391, %f1390;
	sub.ftz.f32 	%f1407, %f7, %f6;
	mul.ftz.f32 	%f1408, %f1391, %f1407;
	sub.ftz.f32 	%f1409, %f5, %f6;
	div.approx.ftz.f32 	%f1400, %f1408, %f1409;
	.loc	22	221	0
	mov.f32 	%f1395, %f1391;
	bra.uni 	$Lt_124_300546;
$Lt_124_300802:
	.loc	22	225	0
	mov.f32 	%f1395, 0f00000000;  	// 0
	mov.f32 	%f1400, 0f00000000;  	// 0
$Lt_124_300546:
	mov.f32 	%f1396, 0f00000000;  	// 0
	bra.uni 	$Lt_124_300034;
$Lt_124_300290:
	@!%p104 bra 	$Lt_124_301314;
	.loc	22	233	0
	max.ftz.f32 	%f1386, %f9, %f10;
	setp.lt.ftz.f32 	%p109, %f9, %f10;
	max.ftz.f32 	%f1387, %f1386, %f11;
	selp.f32 	%f1388, %f9, %f10, %p109;
	setp.lt.ftz.f32 	%p110, %f1388, %f11;
	selp.f32 	%f1389, %f1388, %f11, %p110;
	sub.ftz.f32 	%f1390, %f1387, %f1389;
	cvt.ftz.sat.f32.f32 	%f1391, %f1390;
	sub.ftz.f32 	%f1410, %f7, %f5;
	mul.ftz.f32 	%f1411, %f1391, %f1410;
	sub.ftz.f32 	%f1412, %f6, %f5;
	div.approx.ftz.f32 	%f1400, %f1411, %f1412;
	.loc	22	234	0
	mov.f32 	%f1396, %f1391;
	bra.uni 	$Lt_124_301058;
$Lt_124_301314:
	.loc	22	238	0
	mov.f32 	%f1400, 0f00000000;  	// 0
	mov.f32 	%f1396, 0f00000000;  	// 0
$Lt_124_301058:
	.loc	22	240	0
	mov.f32 	%f1395, 0f00000000;  	// 0
$Lt_124_300034:
$Lt_124_297986:
$Lt_124_295938:
	.loc	22	113	0
	ld.const.f32 	%f471, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1413, %f10, %f471;
	mul.ftz.f32 	%f1414, %f1396, %f471;
	ld.const.f32 	%f470, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1415, %f470, %f11, %f1413;
	fma.rn.ftz.f32 	%f1416, %f470, %f1400, %f1414;
	ld.const.f32 	%f469, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1417, %f469, %f9, %f1415;
	fma.rn.ftz.f32 	%f1418, %f469, %f1395, %f1416;
	cvt.ftz.sat.f32.f32 	%f1419, %f1417;
	cvt.ftz.sat.f32.f32 	%f1420, %f1418;
	sub.ftz.f32 	%f1421, %f1419, %f1420;
	add.ftz.f32 	%f1422, %f1421, %f1395;
	mov.f32 	%f1423, %f1422;
	add.ftz.f32 	%f1424, %f1421, %f1396;
	mov.f32 	%f1425, %f1424;
	add.ftz.f32 	%f1426, %f1421, %f1400;
	mov.f32 	%f1427, %f1426;
	.loc	22	50	0
	mul.ftz.f32 	%f1428, %f1424, %f471;
	fma.rn.ftz.f32 	%f1429, %f470, %f1426, %f1428;
	fma.rn.ftz.f32 	%f1430, %f469, %f1422, %f1429;
	cvt.ftz.sat.f32.f32 	%f1431, %f1430;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p117, %f1424, %f1422;
	selp.f32 	%f1432, %f1422, %f1424, %p117;
	setp.lt.ftz.f32 	%p118, %f1432, %f1426;
	selp.f32 	%f1433, %f1432, %f1426, %p118;
	mov.f32 	%f1434, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p119, %f1433, %f1434;
	@!%p119 bra 	$Lt_124_301570;
	.loc	22	119	0
	sub.ftz.f32 	%f1435, %f1431, %f1433;
	sub.ftz.f32 	%f1436, %f1426, %f1431;
	mul.ftz.f32 	%f1437, %f1431, %f1436;
	div.approx.ftz.f32 	%f1438, %f1437, %f1435;
	add.ftz.f32 	%f1427, %f1431, %f1438;
	.loc	22	120	0
	sub.ftz.f32 	%f1439, %f1424, %f1431;
	mul.ftz.f32 	%f1440, %f1431, %f1439;
	div.approx.ftz.f32 	%f1441, %f1440, %f1435;
	add.ftz.f32 	%f1425, %f1431, %f1441;
	.loc	22	121	0
	sub.ftz.f32 	%f1442, %f1422, %f1431;
	mul.ftz.f32 	%f1443, %f1431, %f1442;
	div.approx.ftz.f32 	%f1444, %f1443, %f1435;
	add.ftz.f32 	%f1423, %f1431, %f1444;
$Lt_124_301570:
	max.ftz.f32 	%f1445, %f1424, %f1422;
	max.ftz.f32 	%f1446, %f1445, %f1426;
	mov.f32 	%f1447, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p120, %f1446, %f1447;
	@!%p120 bra 	$Lt_124_302082;
	.loc	27	529	0
	mov.f32 	%f1448, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1449, %f1448, %f1431;
	sub.ftz.f32 	%f1450, %f1446, %f1431;
	sub.ftz.f32 	%f1451, %f1427, %f1431;
	mul.ftz.f32 	%f1452, %f1449, %f1451;
	div.approx.ftz.f32 	%f1453, %f1452, %f1450;
	.loc	22	125	0
	add.ftz.f32 	%f1427, %f1453, %f1431;
	.loc	27	529	0
	sub.ftz.f32 	%f1454, %f1425, %f1431;
	mul.ftz.f32 	%f1455, %f1449, %f1454;
	div.approx.ftz.f32 	%f1456, %f1455, %f1450;
	.loc	22	126	0
	add.ftz.f32 	%f1425, %f1456, %f1431;
	.loc	27	529	0
	sub.ftz.f32 	%f1457, %f1423, %f1431;
	mul.ftz.f32 	%f1458, %f1449, %f1457;
	div.approx.ftz.f32 	%f1459, %f1458, %f1450;
	.loc	22	127	0
	add.ftz.f32 	%f1423, %f1459, %f1431;
$Lt_124_302082:
	.loc	22	468	0
	ld.param.f32 	%f1460, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1460, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1461, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1461;
	mov.f32 	%f1462, %f138;
	mov.f32 	%f1463, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1464, %f138, %f1463;
	mov.f32 	%f1465, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p121, %f1464, %f1465;
	@!%p121 bra 	$Lt_124_302850;
	mov.f32 	%f1466, 0f00000000;  	// 0
	mov.f32 	%f1467, 0f00000000;  	// 0
	mov.f32 	%f1468, 0f00000000;  	// 0
	mov.f32 	%f1462, 0f00000000;  	// 0
	bra.uni 	$Lt_124_302594;
$Lt_124_302850:
	mov.f32 	%f1469, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1469, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1470, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1471, %f135, %f148;
	sub.ftz.f32 	%f152, %f1470, %f1471;
	mul.ftz.f32 	%f1472, %f147, %f1423;
	fma.rn.ftz.f32 	%f1473, %f1423, %f12, %f1472;
	mul.ftz.f32 	%f1474, %f149, %f1473;
	fma.rn.ftz.f32 	%f1468, %f9, %f152, %f1474;
	mul.ftz.f32 	%f1475, %f147, %f1425;
	fma.rn.ftz.f32 	%f1476, %f1425, %f12, %f1475;
	mul.ftz.f32 	%f1477, %f149, %f1476;
	fma.rn.ftz.f32 	%f1467, %f10, %f152, %f1477;
	mul.ftz.f32 	%f1478, %f147, %f1427;
	fma.rn.ftz.f32 	%f1479, %f1427, %f12, %f1478;
	mul.ftz.f32 	%f1480, %f149, %f1479;
	fma.rn.ftz.f32 	%f1466, %f11, %f152, %f1480;
$Lt_124_302594:
	.loc	6	213	0
	mov.f32 	%f5, %f1468;
	mov.f32 	%f6, %f1467;
	mov.f32 	%f7, %f1466;
	mov.f32 	%f8, %f1462;
	bra.uni 	$Lt_124_313346;
$Lt_124_6402:
	.loc	22	154	0
	max.ftz.f32 	%f1386, %f9, %f10;
	setp.lt.ftz.f32 	%p109, %f9, %f10;
	max.ftz.f32 	%f1387, %f1386, %f11;
	selp.f32 	%f1388, %f9, %f10, %p109;
	setp.lt.ftz.f32 	%p110, %f1388, %f11;
	selp.f32 	%f1389, %f1388, %f11, %p110;
	setp.eq.ftz.f32 	%p122, %f1389, %f11;
	@!%p122 bra 	$Lt_124_303362;
	setp.eq.ftz.f32 	%p123, %f1387, %f10;
	@!%p123 bra 	$Lt_124_303874;
	setp.gt.ftz.f32 	%p124, %f10, %f11;
	@!%p124 bra 	$Lt_124_304386;
	.loc	22	161	0
	setp.lt.ftz.f32 	%p104, %f5, %f6;
	max.ftz.f32 	%f1382, %f5, %f6;
	selp.f32 	%f1383, %f5, %f6, %p104;
	max.ftz.f32 	%f1384, %f1382, %f7;
	setp.lt.ftz.f32 	%p105, %f1383, %f7;
	selp.f32 	%f1385, %f1383, %f7, %p105;
	sub.ftz.f32 	%f1481, %f1384, %f1385;
	cvt.ftz.sat.f32.f32 	%f1482, %f1481;
	sub.ftz.f32 	%f1483, %f9, %f11;
	mul.ftz.f32 	%f1484, %f1482, %f1483;
	sub.ftz.f32 	%f1485, %f10, %f11;
	div.approx.ftz.f32 	%f1486, %f1484, %f1485;
	.loc	22	162	0
	mov.f32 	%f1487, %f1482;
	bra.uni 	$Lt_124_304642;
$Lt_124_304386:
	.loc	22	166	0
	mov.f32 	%f1486, 0f00000000;  	// 0
	mov.f32 	%f1487, 0f00000000;  	// 0
	bra.uni 	$Lt_124_304642;
$Lt_124_303874:
	setp.gt.ftz.f32 	%p125, %f9, %f11;
	@!%p125 bra 	$Lt_124_304898;
	.loc	22	173	0
	setp.lt.ftz.f32 	%p104, %f5, %f6;
	max.ftz.f32 	%f1382, %f5, %f6;
	selp.f32 	%f1383, %f5, %f6, %p104;
	max.ftz.f32 	%f1384, %f1382, %f7;
	setp.lt.ftz.f32 	%p105, %f1383, %f7;
	selp.f32 	%f1385, %f1383, %f7, %p105;
	sub.ftz.f32 	%f1481, %f1384, %f1385;
	cvt.ftz.sat.f32.f32 	%f1482, %f1481;
	sub.ftz.f32 	%f1488, %f10, %f11;
	mul.ftz.f32 	%f1489, %f1482, %f1488;
	sub.ftz.f32 	%f1490, %f9, %f11;
	div.approx.ftz.f32 	%f1487, %f1489, %f1490;
	.loc	22	174	0
	mov.f32 	%f1486, %f1482;
	bra.uni 	$Lt_124_304642;
$Lt_124_304898:
	.loc	22	178	0
	mov.f32 	%f1486, 0f00000000;  	// 0
	mov.f32 	%f1487, 0f00000000;  	// 0
$Lt_124_304642:
$Lt_124_303618:
	mov.f32 	%f1491, 0f00000000;  	// 0
	bra.uni 	$Lt_124_307202;
$Lt_124_303362:
	setp.eq.ftz.f32 	%p126, %f1389, %f10;
	setp.eq.ftz.f32 	%p127, %f1387, %f11;
	@!%p127 bra 	$Lt_124_305410;
	@!%p126 bra 	$Lt_124_305922;
	setp.lt.ftz.f32 	%p128, %f10, %f11;
	@!%p128 bra 	$Lt_124_306434;
	.loc	22	191	0
	setp.lt.ftz.f32 	%p104, %f5, %f6;
	max.ftz.f32 	%f1382, %f5, %f6;
	selp.f32 	%f1383, %f5, %f6, %p104;
	max.ftz.f32 	%f1384, %f1382, %f7;
	setp.lt.ftz.f32 	%p105, %f1383, %f7;
	selp.f32 	%f1385, %f1383, %f7, %p105;
	sub.ftz.f32 	%f1481, %f1384, %f1385;
	cvt.ftz.sat.f32.f32 	%f1482, %f1481;
	sub.ftz.f32 	%f1492, %f9, %f10;
	mul.ftz.f32 	%f1493, %f1482, %f1492;
	sub.ftz.f32 	%f1494, %f11, %f10;
	div.approx.ftz.f32 	%f1486, %f1493, %f1494;
	.loc	22	192	0
	mov.f32 	%f1491, %f1482;
	bra.uni 	$Lt_124_306178;
$Lt_124_306434:
	.loc	22	196	0
	mov.f32 	%f1486, 0f00000000;  	// 0
	mov.f32 	%f1491, 0f00000000;  	// 0
$Lt_124_306178:
	mov.f32 	%f1487, 0f00000000;  	// 0
	bra.uni 	$Lt_124_307202;
$Lt_124_305922:
	setp.lt.ftz.f32 	%p129, %f9, %f11;
	@!%p129 bra 	$Lt_124_306946;
	.loc	22	204	0
	setp.lt.ftz.f32 	%p104, %f5, %f6;
	max.ftz.f32 	%f1382, %f5, %f6;
	selp.f32 	%f1383, %f5, %f6, %p104;
	max.ftz.f32 	%f1384, %f1382, %f7;
	setp.lt.ftz.f32 	%p105, %f1383, %f7;
	selp.f32 	%f1385, %f1383, %f7, %p105;
	sub.ftz.f32 	%f1481, %f1384, %f1385;
	cvt.ftz.sat.f32.f32 	%f1482, %f1481;
	sub.ftz.f32 	%f1495, %f10, %f9;
	mul.ftz.f32 	%f1496, %f1482, %f1495;
	sub.ftz.f32 	%f1497, %f11, %f9;
	div.approx.ftz.f32 	%f1487, %f1496, %f1497;
	.loc	22	205	0
	mov.f32 	%f1491, %f1482;
	bra.uni 	$Lt_124_306690;
$Lt_124_306946:
	.loc	22	209	0
	mov.f32 	%f1491, 0f00000000;  	// 0
	mov.f32 	%f1487, 0f00000000;  	// 0
$Lt_124_306690:
	.loc	22	211	0
	mov.f32 	%f1486, 0f00000000;  	// 0
	bra.uni 	$Lt_124_307202;
$Lt_124_305410:
	@!%p126 bra 	$Lt_124_307458;
	setp.gt.ftz.f32 	%p130, %f9, %f10;
	@!%p130 bra 	$Lt_124_307970;
	.loc	22	220	0
	setp.lt.ftz.f32 	%p104, %f5, %f6;
	max.ftz.f32 	%f1382, %f5, %f6;
	selp.f32 	%f1383, %f5, %f6, %p104;
	max.ftz.f32 	%f1384, %f1382, %f7;
	setp.lt.ftz.f32 	%p105, %f1383, %f7;
	selp.f32 	%f1385, %f1383, %f7, %p105;
	sub.ftz.f32 	%f1481, %f1384, %f1385;
	cvt.ftz.sat.f32.f32 	%f1482, %f1481;
	sub.ftz.f32 	%f1498, %f11, %f10;
	mul.ftz.f32 	%f1499, %f1482, %f1498;
	sub.ftz.f32 	%f1500, %f9, %f10;
	div.approx.ftz.f32 	%f1491, %f1499, %f1500;
	.loc	22	221	0
	mov.f32 	%f1486, %f1482;
	bra.uni 	$Lt_124_307714;
$Lt_124_307970:
	.loc	22	225	0
	mov.f32 	%f1486, 0f00000000;  	// 0
	mov.f32 	%f1491, 0f00000000;  	// 0
$Lt_124_307714:
	mov.f32 	%f1487, 0f00000000;  	// 0
	bra.uni 	$Lt_124_307202;
$Lt_124_307458:
	@!%p109 bra 	$Lt_124_308482;
	.loc	22	233	0
	setp.lt.ftz.f32 	%p104, %f5, %f6;
	max.ftz.f32 	%f1382, %f5, %f6;
	selp.f32 	%f1383, %f5, %f6, %p104;
	max.ftz.f32 	%f1384, %f1382, %f7;
	setp.lt.ftz.f32 	%p105, %f1383, %f7;
	selp.f32 	%f1385, %f1383, %f7, %p105;
	sub.ftz.f32 	%f1481, %f1384, %f1385;
	cvt.ftz.sat.f32.f32 	%f1482, %f1481;
	sub.ftz.f32 	%f1501, %f11, %f9;
	mul.ftz.f32 	%f1502, %f1482, %f1501;
	sub.ftz.f32 	%f1503, %f10, %f9;
	div.approx.ftz.f32 	%f1491, %f1502, %f1503;
	.loc	22	234	0
	mov.f32 	%f1487, %f1482;
	bra.uni 	$Lt_124_308226;
$Lt_124_308482:
	.loc	22	238	0
	mov.f32 	%f1491, 0f00000000;  	// 0
	mov.f32 	%f1487, 0f00000000;  	// 0
$Lt_124_308226:
	.loc	22	240	0
	mov.f32 	%f1486, 0f00000000;  	// 0
$Lt_124_307202:
$Lt_124_305154:
$Lt_124_303106:
	.loc	22	113	0
	ld.const.f32 	%f471, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1504, %f10, %f471;
	mul.ftz.f32 	%f1505, %f1487, %f471;
	ld.const.f32 	%f470, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1506, %f470, %f11, %f1504;
	fma.rn.ftz.f32 	%f1507, %f470, %f1491, %f1505;
	ld.const.f32 	%f469, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1508, %f469, %f9, %f1506;
	fma.rn.ftz.f32 	%f1509, %f469, %f1486, %f1507;
	cvt.ftz.sat.f32.f32 	%f1510, %f1508;
	cvt.ftz.sat.f32.f32 	%f1511, %f1509;
	sub.ftz.f32 	%f1512, %f1510, %f1511;
	add.ftz.f32 	%f1513, %f1512, %f1486;
	mov.f32 	%f1514, %f1513;
	add.ftz.f32 	%f1515, %f1512, %f1487;
	mov.f32 	%f1516, %f1515;
	add.ftz.f32 	%f1517, %f1512, %f1491;
	mov.f32 	%f1518, %f1517;
	.loc	22	50	0
	mul.ftz.f32 	%f1519, %f1515, %f471;
	fma.rn.ftz.f32 	%f1520, %f470, %f1517, %f1519;
	fma.rn.ftz.f32 	%f1521, %f469, %f1513, %f1520;
	cvt.ftz.sat.f32.f32 	%f1522, %f1521;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p131, %f1515, %f1513;
	selp.f32 	%f1523, %f1513, %f1515, %p131;
	setp.lt.ftz.f32 	%p132, %f1523, %f1517;
	selp.f32 	%f1524, %f1523, %f1517, %p132;
	mov.f32 	%f1525, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p133, %f1524, %f1525;
	@!%p133 bra 	$Lt_124_308738;
	.loc	22	119	0
	sub.ftz.f32 	%f1526, %f1522, %f1524;
	sub.ftz.f32 	%f1527, %f1517, %f1522;
	mul.ftz.f32 	%f1528, %f1522, %f1527;
	div.approx.ftz.f32 	%f1529, %f1528, %f1526;
	add.ftz.f32 	%f1518, %f1522, %f1529;
	.loc	22	120	0
	sub.ftz.f32 	%f1530, %f1515, %f1522;
	mul.ftz.f32 	%f1531, %f1522, %f1530;
	div.approx.ftz.f32 	%f1532, %f1531, %f1526;
	add.ftz.f32 	%f1516, %f1522, %f1532;
	.loc	22	121	0
	sub.ftz.f32 	%f1533, %f1513, %f1522;
	mul.ftz.f32 	%f1534, %f1522, %f1533;
	div.approx.ftz.f32 	%f1535, %f1534, %f1526;
	add.ftz.f32 	%f1514, %f1522, %f1535;
$Lt_124_308738:
	max.ftz.f32 	%f1536, %f1515, %f1513;
	max.ftz.f32 	%f1537, %f1536, %f1517;
	mov.f32 	%f1538, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p134, %f1537, %f1538;
	@!%p134 bra 	$Lt_124_309250;
	.loc	27	529	0
	mov.f32 	%f1539, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1540, %f1539, %f1522;
	sub.ftz.f32 	%f1541, %f1537, %f1522;
	sub.ftz.f32 	%f1542, %f1518, %f1522;
	mul.ftz.f32 	%f1543, %f1540, %f1542;
	div.approx.ftz.f32 	%f1544, %f1543, %f1541;
	.loc	22	125	0
	add.ftz.f32 	%f1518, %f1544, %f1522;
	.loc	27	529	0
	sub.ftz.f32 	%f1545, %f1516, %f1522;
	mul.ftz.f32 	%f1546, %f1540, %f1545;
	div.approx.ftz.f32 	%f1547, %f1546, %f1541;
	.loc	22	126	0
	add.ftz.f32 	%f1516, %f1547, %f1522;
	.loc	27	529	0
	sub.ftz.f32 	%f1548, %f1514, %f1522;
	mul.ftz.f32 	%f1549, %f1540, %f1548;
	div.approx.ftz.f32 	%f1550, %f1549, %f1541;
	.loc	22	127	0
	add.ftz.f32 	%f1514, %f1550, %f1522;
$Lt_124_309250:
	.loc	22	468	0
	ld.param.f32 	%f1551, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1551, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1552, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1552;
	mov.f32 	%f1553, %f138;
	mov.f32 	%f1554, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1555, %f138, %f1554;
	mov.f32 	%f1556, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p135, %f1555, %f1556;
	@!%p135 bra 	$Lt_124_310018;
	mov.f32 	%f1557, 0f00000000;  	// 0
	mov.f32 	%f1558, 0f00000000;  	// 0
	mov.f32 	%f1559, 0f00000000;  	// 0
	mov.f32 	%f1553, 0f00000000;  	// 0
	bra.uni 	$Lt_124_309762;
$Lt_124_310018:
	mov.f32 	%f1560, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1560, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1561, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1562, %f135, %f148;
	sub.ftz.f32 	%f152, %f1561, %f1562;
	mul.ftz.f32 	%f1563, %f147, %f1514;
	fma.rn.ftz.f32 	%f1564, %f1514, %f12, %f1563;
	mul.ftz.f32 	%f1565, %f149, %f1564;
	fma.rn.ftz.f32 	%f1559, %f9, %f152, %f1565;
	mul.ftz.f32 	%f1566, %f147, %f1516;
	fma.rn.ftz.f32 	%f1567, %f1516, %f12, %f1566;
	mul.ftz.f32 	%f1568, %f149, %f1567;
	fma.rn.ftz.f32 	%f1558, %f10, %f152, %f1568;
	mul.ftz.f32 	%f1569, %f147, %f1518;
	fma.rn.ftz.f32 	%f1570, %f1518, %f12, %f1569;
	mul.ftz.f32 	%f1571, %f149, %f1570;
	fma.rn.ftz.f32 	%f1557, %f11, %f152, %f1571;
$Lt_124_309762:
	.loc	6	214	0
	mov.f32 	%f5, %f1559;
	mov.f32 	%f6, %f1558;
	mov.f32 	%f7, %f1557;
	mov.f32 	%f8, %f1553;
	bra.uni 	$Lt_124_313346;
$Lt_124_6658:
	.loc	22	113	0
	ld.const.f32 	%f471, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1572, %f6, %f471;
	mul.ftz.f32 	%f1573, %f10, %f471;
	ld.const.f32 	%f470, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1574, %f470, %f7, %f1572;
	fma.rn.ftz.f32 	%f1575, %f470, %f11, %f1573;
	ld.const.f32 	%f469, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1576, %f469, %f5, %f1574;
	fma.rn.ftz.f32 	%f1577, %f469, %f9, %f1575;
	cvt.ftz.sat.f32.f32 	%f1578, %f1576;
	cvt.ftz.sat.f32.f32 	%f1579, %f1577;
	sub.ftz.f32 	%f1580, %f1579, %f1578;
	add.ftz.f32 	%f1581, %f1580, %f5;
	mov.f32 	%f1582, %f1581;
	add.ftz.f32 	%f1583, %f1580, %f6;
	mov.f32 	%f1584, %f1583;
	add.ftz.f32 	%f1585, %f1580, %f7;
	mov.f32 	%f1586, %f1585;
	.loc	22	50	0
	mul.ftz.f32 	%f1587, %f1583, %f471;
	fma.rn.ftz.f32 	%f1588, %f470, %f1585, %f1587;
	fma.rn.ftz.f32 	%f1589, %f469, %f1581, %f1588;
	cvt.ftz.sat.f32.f32 	%f1590, %f1589;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p136, %f1583, %f1581;
	selp.f32 	%f1591, %f1581, %f1583, %p136;
	setp.lt.ftz.f32 	%p137, %f1591, %f1585;
	selp.f32 	%f1592, %f1591, %f1585, %p137;
	mov.f32 	%f1593, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p138, %f1592, %f1593;
	@!%p138 bra 	$Lt_124_310274;
	.loc	22	119	0
	sub.ftz.f32 	%f1594, %f1590, %f1592;
	sub.ftz.f32 	%f1595, %f1585, %f1590;
	mul.ftz.f32 	%f1596, %f1590, %f1595;
	div.approx.ftz.f32 	%f1597, %f1596, %f1594;
	add.ftz.f32 	%f1586, %f1590, %f1597;
	.loc	22	120	0
	sub.ftz.f32 	%f1598, %f1583, %f1590;
	mul.ftz.f32 	%f1599, %f1590, %f1598;
	div.approx.ftz.f32 	%f1600, %f1599, %f1594;
	add.ftz.f32 	%f1584, %f1590, %f1600;
	.loc	22	121	0
	sub.ftz.f32 	%f1601, %f1581, %f1590;
	mul.ftz.f32 	%f1602, %f1590, %f1601;
	div.approx.ftz.f32 	%f1603, %f1602, %f1594;
	add.ftz.f32 	%f1582, %f1590, %f1603;
$Lt_124_310274:
	max.ftz.f32 	%f1604, %f1583, %f1581;
	max.ftz.f32 	%f1605, %f1604, %f1585;
	mov.f32 	%f1606, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p139, %f1605, %f1606;
	@!%p139 bra 	$Lt_124_310786;
	.loc	27	529	0
	mov.f32 	%f1607, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1608, %f1607, %f1590;
	sub.ftz.f32 	%f1609, %f1605, %f1590;
	sub.ftz.f32 	%f1610, %f1586, %f1590;
	mul.ftz.f32 	%f1611, %f1608, %f1610;
	div.approx.ftz.f32 	%f1612, %f1611, %f1609;
	.loc	22	125	0
	add.ftz.f32 	%f1586, %f1612, %f1590;
	.loc	27	529	0
	sub.ftz.f32 	%f1613, %f1584, %f1590;
	mul.ftz.f32 	%f1614, %f1608, %f1613;
	div.approx.ftz.f32 	%f1615, %f1614, %f1609;
	.loc	22	126	0
	add.ftz.f32 	%f1584, %f1615, %f1590;
	.loc	27	529	0
	sub.ftz.f32 	%f1616, %f1582, %f1590;
	mul.ftz.f32 	%f1617, %f1608, %f1616;
	div.approx.ftz.f32 	%f1618, %f1617, %f1609;
	.loc	22	127	0
	add.ftz.f32 	%f1582, %f1618, %f1590;
$Lt_124_310786:
	.loc	22	468	0
	ld.param.f32 	%f1619, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1619, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1620, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1620;
	mov.f32 	%f1621, %f138;
	mov.f32 	%f1622, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1623, %f138, %f1622;
	mov.f32 	%f1624, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p140, %f1623, %f1624;
	@!%p140 bra 	$Lt_124_311554;
	mov.f32 	%f1625, 0f00000000;  	// 0
	mov.f32 	%f1626, 0f00000000;  	// 0
	mov.f32 	%f1627, 0f00000000;  	// 0
	mov.f32 	%f1621, 0f00000000;  	// 0
	bra.uni 	$Lt_124_311298;
$Lt_124_311554:
	mov.f32 	%f1628, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1628, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1629, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1630, %f135, %f148;
	sub.ftz.f32 	%f152, %f1629, %f1630;
	mul.ftz.f32 	%f1631, %f147, %f1582;
	fma.rn.ftz.f32 	%f1632, %f1582, %f12, %f1631;
	mul.ftz.f32 	%f1633, %f149, %f1632;
	fma.rn.ftz.f32 	%f1627, %f9, %f152, %f1633;
	mul.ftz.f32 	%f1634, %f147, %f1584;
	fma.rn.ftz.f32 	%f1635, %f1584, %f12, %f1634;
	mul.ftz.f32 	%f1636, %f149, %f1635;
	fma.rn.ftz.f32 	%f1626, %f10, %f152, %f1636;
	mul.ftz.f32 	%f1637, %f147, %f1586;
	fma.rn.ftz.f32 	%f1638, %f1586, %f12, %f1637;
	mul.ftz.f32 	%f1639, %f149, %f1638;
	fma.rn.ftz.f32 	%f1625, %f11, %f152, %f1639;
$Lt_124_311298:
	.loc	6	215	0
	mov.f32 	%f5, %f1627;
	mov.f32 	%f6, %f1626;
	mov.f32 	%f7, %f1625;
	mov.f32 	%f8, %f1621;
	bra.uni 	$Lt_124_313346;
$Lt_124_6914:
	.loc	22	113	0
	ld.const.f32 	%f471, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1640, %f6, %f471;
	mul.ftz.f32 	%f1641, %f10, %f471;
	ld.const.f32 	%f470, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1642, %f470, %f7, %f1640;
	fma.rn.ftz.f32 	%f1643, %f470, %f11, %f1641;
	ld.const.f32 	%f469, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1644, %f469, %f5, %f1642;
	fma.rn.ftz.f32 	%f1645, %f469, %f9, %f1643;
	cvt.ftz.sat.f32.f32 	%f1646, %f1644;
	cvt.ftz.sat.f32.f32 	%f1647, %f1645;
	sub.ftz.f32 	%f1648, %f1646, %f1647;
	add.ftz.f32 	%f1649, %f1648, %f9;
	mov.f32 	%f1650, %f1649;
	add.ftz.f32 	%f1651, %f1648, %f10;
	mov.f32 	%f1652, %f1651;
	add.ftz.f32 	%f1653, %f1648, %f11;
	mov.f32 	%f1654, %f1653;
	.loc	22	50	0
	mul.ftz.f32 	%f1655, %f1651, %f471;
	fma.rn.ftz.f32 	%f1656, %f470, %f1653, %f1655;
	fma.rn.ftz.f32 	%f1657, %f469, %f1649, %f1656;
	cvt.ftz.sat.f32.f32 	%f1658, %f1657;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p141, %f1651, %f1649;
	selp.f32 	%f1659, %f1649, %f1651, %p141;
	setp.lt.ftz.f32 	%p142, %f1659, %f1653;
	selp.f32 	%f1660, %f1659, %f1653, %p142;
	mov.f32 	%f1661, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p143, %f1660, %f1661;
	@!%p143 bra 	$Lt_124_311810;
	.loc	22	119	0
	sub.ftz.f32 	%f1662, %f1658, %f1660;
	sub.ftz.f32 	%f1663, %f1653, %f1658;
	mul.ftz.f32 	%f1664, %f1658, %f1663;
	div.approx.ftz.f32 	%f1665, %f1664, %f1662;
	add.ftz.f32 	%f1654, %f1658, %f1665;
	.loc	22	120	0
	sub.ftz.f32 	%f1666, %f1651, %f1658;
	mul.ftz.f32 	%f1667, %f1658, %f1666;
	div.approx.ftz.f32 	%f1668, %f1667, %f1662;
	add.ftz.f32 	%f1652, %f1658, %f1668;
	.loc	22	121	0
	sub.ftz.f32 	%f1669, %f1649, %f1658;
	mul.ftz.f32 	%f1670, %f1658, %f1669;
	div.approx.ftz.f32 	%f1671, %f1670, %f1662;
	add.ftz.f32 	%f1650, %f1658, %f1671;
$Lt_124_311810:
	max.ftz.f32 	%f1672, %f1651, %f1649;
	max.ftz.f32 	%f1673, %f1672, %f1653;
	mov.f32 	%f1674, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p144, %f1673, %f1674;
	@!%p144 bra 	$Lt_124_312322;
	.loc	27	529	0
	mov.f32 	%f1675, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1676, %f1675, %f1658;
	sub.ftz.f32 	%f1677, %f1673, %f1658;
	sub.ftz.f32 	%f1678, %f1654, %f1658;
	mul.ftz.f32 	%f1679, %f1676, %f1678;
	div.approx.ftz.f32 	%f1680, %f1679, %f1677;
	.loc	22	125	0
	add.ftz.f32 	%f1654, %f1680, %f1658;
	.loc	27	529	0
	sub.ftz.f32 	%f1681, %f1652, %f1658;
	mul.ftz.f32 	%f1682, %f1676, %f1681;
	div.approx.ftz.f32 	%f1683, %f1682, %f1677;
	.loc	22	126	0
	add.ftz.f32 	%f1652, %f1683, %f1658;
	.loc	27	529	0
	sub.ftz.f32 	%f1684, %f1650, %f1658;
	mul.ftz.f32 	%f1685, %f1676, %f1684;
	div.approx.ftz.f32 	%f1686, %f1685, %f1677;
	.loc	22	127	0
	add.ftz.f32 	%f1650, %f1686, %f1658;
$Lt_124_312322:
	.loc	22	468	0
	ld.param.f32 	%f1687, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f135, %f1687, %f8;
	add.ftz.f32 	%f136, %f135, %f12;
	mul.ftz.f32 	%f1688, %f135, %f12;
	sub.ftz.f32 	%f138, %f136, %f1688;
	mov.f32 	%f1689, %f138;
	mov.f32 	%f1690, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1691, %f138, %f1690;
	mov.f32 	%f1692, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p145, %f1691, %f1692;
	@!%p145 bra 	$Lt_124_313090;
	mov.f32 	%f1693, 0f00000000;  	// 0
	mov.f32 	%f1694, 0f00000000;  	// 0
	mov.f32 	%f1695, 0f00000000;  	// 0
	mov.f32 	%f1689, 0f00000000;  	// 0
	bra.uni 	$Lt_124_312834;
$Lt_124_313090:
	mov.f32 	%f1696, 0f3f800000;  	// 1
	sub.ftz.f32 	%f147, %f1696, %f12;
	rcp.approx.ftz.f32 	%f148, %f138;
	mul.ftz.f32 	%f149, %f148, %f135;
	mov.f32 	%f1697, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1698, %f135, %f148;
	sub.ftz.f32 	%f152, %f1697, %f1698;
	mul.ftz.f32 	%f1699, %f147, %f1650;
	fma.rn.ftz.f32 	%f1700, %f1650, %f12, %f1699;
	mul.ftz.f32 	%f1701, %f149, %f1700;
	fma.rn.ftz.f32 	%f1695, %f9, %f152, %f1701;
	mul.ftz.f32 	%f1702, %f147, %f1652;
	fma.rn.ftz.f32 	%f1703, %f1652, %f12, %f1702;
	mul.ftz.f32 	%f1704, %f149, %f1703;
	fma.rn.ftz.f32 	%f1694, %f10, %f152, %f1704;
	mul.ftz.f32 	%f1705, %f147, %f1654;
	fma.rn.ftz.f32 	%f1706, %f1654, %f12, %f1705;
	mul.ftz.f32 	%f1707, %f149, %f1706;
	fma.rn.ftz.f32 	%f1693, %f11, %f152, %f1707;
$Lt_124_312834:
	.loc	6	216	0
	mov.f32 	%f5, %f1695;
	mov.f32 	%f6, %f1694;
	mov.f32 	%f7, %f1693;
	mov.f32 	%f8, %f1689;
	bra.uni 	$Lt_124_313346;
$Lt_124_269058:
	.loc	6	218	0
	@!%p8 bra 	$Lt_124_313346;
	.loc	6	226	0
	cvt.ftz.sat.f32.f32 	%f1708, %f8;
	.loc	6	243	0
	ld.param.f32 	%f1709, [__cudaparm_TranslateKernel_inAlphaGain];
	mul.ftz.f32 	%f8, %f1709, %f1708;
$Lt_124_313346:
$Lt_124_268802:
	@!%p7 bra 	$Lt_124_314114;
	.loc	21	126	0
	mul.lo.u64 	%rd13, %rd7, 8;
	add.u64 	%rd14, %rd8, %rd13;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f5;
	mov.b32		%r117, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f6;
	mov.b32		%r118, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r119, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f8;
	mov.b32		%r120, %b1; }
	st.global.v4.u16 	[%rd14+0], {%r117,%r118,%r119,%r120};
	.loc	6	246	0
	bra.uni 	$Lt_124_315394;
$Lt_124_314114:
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd7, 16;
	add.u64 	%rd16, %rd8, %rd15;
	st.global.v4.f32 	[%rd16+0], {%f5,%f6,%f7,%f8};
$Lt_124_315394:
$L_124_266754:
	.loc	6	381	0
	exit;
$LDWend_TranslateKernel:
	} // TranslateKernel

	.entry cuda_motion_renderquad (
		.param .u64 __cudaparm_cuda_motion_renderquad_dstFrame,
		.param .u32 __cudaparm_cuda_motion_renderquad_inDeviceFormat,
		.param .s32 __cudaparm_cuda_motion_renderquad_width,
		.param .s32 __cudaparm_cuda_motion_renderquad_height,
		.param .f32 __cudaparm_cuda_motion_renderquad_tWidth,
		.param .f32 __cudaparm_cuda_motion_renderquad_tHeight,
		.param .s32 __cudaparm_cuda_motion_renderquad_srcPitch,
		.param .s32 __cudaparm_cuda_motion_renderquad_dstPitch,
		.param .u32 __cudaparm_cuda_motion_renderquad_blendMode,
		.param .s8 __cudaparm_cuda_motion_renderquad_inDoCompositeOver,
		.param .f32 __cudaparm_cuda_motion_renderquad_alphaGain,
		.param .align 8 .b8 __cudaparm_cuda_motion_renderquad___val_paramquad[120])
	{
	.reg .u32 %r<121>;
	.reg .u64 %rd<18>;
	.reg .f32 %f<2199>;
	.reg .pred %p<150>;
	.loc	6	502	0
$LDWbegin_cuda_motion_renderquad:
	.loc	6	505	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_cuda_motion_renderquad_width];
	ld.param.s32 	%r12, [__cudaparm_cuda_motion_renderquad_height];
	set.le.u32.s32 	%r13, %r12, %r10;
	neg.s32 	%r14, %r13;
	set.le.u32.s32 	%r15, %r11, %r8;
	neg.s32 	%r16, %r15;
	or.b32 	%r17, %r14, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_125_267266;
	bra.uni 	$LBB381_cuda_motion_renderquad;
$Lt_125_267266:
	.loc	6	513	0
	cvt.rn.f32.s32 	%f1, %r8;
	cvt.rn.f32.s32 	%f2, %r10;
	mov.f32 	%f3, 0f3f000000;     	// 0.5
	add.ftz.f32 	%f4, %f1, %f3;
	mov.f32 	%f5, 0f3f000000;     	// 0.5
	add.ftz.f32 	%f6, %f2, %f5;
	mov.f32 	%f7, 0f3f800000;     	// 1
	ld.param.f32 	%f8, [__cudaparm_cuda_motion_renderquad___val_paramquad+84];
	ld.param.f32 	%f9, [__cudaparm_cuda_motion_renderquad___val_paramquad+48];
	sub.ftz.f32 	%f10, %f9, %f4;
	mul.ftz.f32 	%f11, %f8, %f10;
	ld.param.f32 	%f12, [__cudaparm_cuda_motion_renderquad___val_paramquad+80];
	ld.param.f32 	%f13, [__cudaparm_cuda_motion_renderquad___val_paramquad+52];
	sub.ftz.f32 	%f14, %f13, %f6;
	mul.ftz.f32 	%f15, %f12, %f14;
	sub.ftz.f32 	%f16, %f15, %f11;
	mov.f32 	%f17, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f18, %f16, %f17;
	cvt.ftz.sat.f32.f32 	%f19, %f18;
	sub.ftz.f32 	%f20, %f7, %f19;
	mov.f32 	%f21, 0f3f800000;    	// 1
	ld.param.f32 	%f22, [__cudaparm_cuda_motion_renderquad___val_paramquad+92];
	ld.param.f32 	%f23, [__cudaparm_cuda_motion_renderquad___val_paramquad+56];
	sub.ftz.f32 	%f24, %f23, %f4;
	mul.ftz.f32 	%f25, %f22, %f24;
	ld.param.f32 	%f26, [__cudaparm_cuda_motion_renderquad___val_paramquad+88];
	ld.param.f32 	%f27, [__cudaparm_cuda_motion_renderquad___val_paramquad+60];
	sub.ftz.f32 	%f28, %f27, %f6;
	mul.ftz.f32 	%f29, %f26, %f28;
	sub.ftz.f32 	%f30, %f29, %f25;
	mov.f32 	%f31, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f32, %f30, %f31;
	cvt.ftz.sat.f32.f32 	%f33, %f32;
	sub.ftz.f32 	%f34, %f21, %f33;
	mul.ftz.f32 	%f35, %f20, %f34;
	.loc	6	514	0
	mov.f32 	%f36, 0f3f800000;    	// 1
	ld.param.f32 	%f37, [__cudaparm_cuda_motion_renderquad___val_paramquad+100];
	ld.param.f32 	%f38, [__cudaparm_cuda_motion_renderquad___val_paramquad+64];
	sub.ftz.f32 	%f39, %f38, %f4;
	mul.ftz.f32 	%f40, %f37, %f39;
	ld.param.f32 	%f41, [__cudaparm_cuda_motion_renderquad___val_paramquad+96];
	ld.param.f32 	%f42, [__cudaparm_cuda_motion_renderquad___val_paramquad+68];
	sub.ftz.f32 	%f43, %f42, %f6;
	mul.ftz.f32 	%f44, %f41, %f43;
	sub.ftz.f32 	%f45, %f44, %f40;
	mov.f32 	%f46, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f47, %f45, %f46;
	cvt.ftz.sat.f32.f32 	%f48, %f47;
	sub.ftz.f32 	%f49, %f36, %f48;
	mul.ftz.f32 	%f50, %f35, %f49;
	.loc	6	515	0
	mov.f32 	%f51, 0f3f800000;    	// 1
	ld.param.f32 	%f52, [__cudaparm_cuda_motion_renderquad___val_paramquad+108];
	ld.param.f32 	%f53, [__cudaparm_cuda_motion_renderquad___val_paramquad+72];
	sub.ftz.f32 	%f54, %f53, %f4;
	mul.ftz.f32 	%f55, %f52, %f54;
	ld.param.f32 	%f56, [__cudaparm_cuda_motion_renderquad___val_paramquad+104];
	ld.param.f32 	%f57, [__cudaparm_cuda_motion_renderquad___val_paramquad+76];
	sub.ftz.f32 	%f58, %f57, %f6;
	mul.ftz.f32 	%f59, %f56, %f58;
	sub.ftz.f32 	%f60, %f59, %f55;
	mov.f32 	%f61, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f62, %f60, %f61;
	cvt.ftz.sat.f32.f32 	%f63, %f62;
	sub.ftz.f32 	%f64, %f51, %f63;
	mul.ftz.f32 	%f65, %f50, %f64;
	ld.param.s8 	%r19, [__cudaparm_cuda_motion_renderquad_inDoCompositeOver];
	mov.f32 	%f66, 0f02081cea;    	// 1e-037
	setp.lt.ftz.f32 	%p2, %f65, %f66;
	@!%p2 bra 	$Lt_125_267778;
	mov.u32 	%r20, 0;
	setp.ne.s32 	%p3, %r19, %r20;
	@%p3 bra 	$LBB381_cuda_motion_renderquad;
	ld.param.s32 	%r21, [__cudaparm_cuda_motion_renderquad_dstPitch];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_cuda_motion_renderquad_dstFrame];
	ld.param.u32 	%r24, [__cudaparm_cuda_motion_renderquad_inDeviceFormat];
	mov.u32 	%r25, 0;
	setp.ne.s32 	%p4, %r24, %r25;
	@%p4 bra 	$Lt_125_269058;
	.loc	21	126	0
	mov.f32 	%f67, 0f00000000;    	// 0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f67;
	mov.b32		%r26, %b1; }
	mov.s32 	%r27, %r26;
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	st.global.v4.u16 	[%rd4+0], {%r27,%r27,%r27,%r27};
	.loc	6	521	0
	bra.uni 	$LBB381_cuda_motion_renderquad;
$Lt_125_269058:
	.loc	21	126	0
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	mov.f32 	%f68, 0f00000000;    	// 0
	mov.f32 	%f69, 0f00000000;    	// 0
	mov.f32 	%f70, 0f00000000;    	// 0
	mov.f32 	%f71, 0f00000000;    	// 0
	st.global.v4.f32 	[%rd6+0], {%f68,%f69,%f70,%f71};
	bra.uni 	$LBB381_cuda_motion_renderquad;
$Lt_125_267778:
	.loc	6	90	0
	ld.param.f32 	%f72, [__cudaparm_cuda_motion_renderquad___val_paramquad+44];
	neg.ftz.f32 	%f73, %f72;
	ld.param.f32 	%f74, [__cudaparm_cuda_motion_renderquad___val_paramquad+36];
	sub.ftz.f32 	%f75, %f4, %f74;
	ld.param.f32 	%f76, [__cudaparm_cuda_motion_renderquad___val_paramquad+40];
	sub.ftz.f32 	%f77, %f6, %f76;
	mul.ftz.f32 	%f78, %f77, %f77;
	fma.rn.ftz.f32 	%f79, %f75, %f75, %f78;
	fma.rn.ftz.f32 	%f80, %f73, %f73, %f79;
	rsqrt.approx.ftz.f32 	%f81, %f80;
	mul.ftz.f32 	%f82, %f81, %f72;
	mul.ftz.f32 	%f83, %f77, %f81;
	ld.param.f32 	%f84, [__cudaparm_cuda_motion_renderquad___val_paramquad+32];
	ld.param.f32 	%f85, [__cudaparm_cuda_motion_renderquad___val_paramquad+28];
	mul.ftz.f32 	%f86, %f82, %f85;
	fma.rn.ftz.f32 	%f87, %f83, %f84, %f86;
	.loc	6	91	0
	mul.ftz.f32 	%f88, %f75, %f81;
	neg.ftz.f32 	%f89, %f82;
	ld.param.f32 	%f90, [__cudaparm_cuda_motion_renderquad___val_paramquad+24];
	mul.ftz.f32 	%f91, %f88, %f84;
	mul.ftz.f32 	%f92, %f89, %f90;
	sub.ftz.f32 	%f93, %f92, %f91;
	.loc	6	92	0
	mul.ftz.f32 	%f94, %f83, %f90;
	mul.ftz.f32 	%f95, %f88, %f85;
	sub.ftz.f32 	%f96, %f95, %f94;
	.loc	6	98	0
	ld.param.f32 	%f97, [__cudaparm_cuda_motion_renderquad___val_paramquad+0];
	sub.ftz.f32 	%f98, %f74, %f97;
	ld.param.f32 	%f99, [__cudaparm_cuda_motion_renderquad___val_paramquad+8];
	sub.ftz.f32 	%f100, %f72, %f99;
	ld.param.f32 	%f101, [__cudaparm_cuda_motion_renderquad___val_paramquad+4];
	sub.ftz.f32 	%f102, %f76, %f101;
	ld.param.f32 	%f103, [__cudaparm_cuda_motion_renderquad___val_paramquad+16];
	ld.param.f32 	%f104, [__cudaparm_cuda_motion_renderquad___val_paramquad+12];
	ld.param.f32 	%f105, [__cudaparm_cuda_motion_renderquad___val_paramquad+20];
	mul.ftz.f32 	%f106, %f98, %f105;
	mul.ftz.f32 	%f107, %f100, %f104;
	sub.ftz.f32 	%f108, %f107, %f106;
	mul.ftz.f32 	%f109, %f83, %f108;
	mul.ftz.f32 	%f110, %f100, %f103;
	mul.ftz.f32 	%f111, %f102, %f105;
	sub.ftz.f32 	%f112, %f111, %f110;
	fma.rn.ftz.f32 	%f113, %f112, %f88, %f109;
	mul.ftz.f32 	%f114, %f102, %f104;
	mul.ftz.f32 	%f115, %f98, %f103;
	sub.ftz.f32 	%f116, %f115, %f114;
	fma.rn.ftz.f32 	%f117, %f116, %f89, %f113;
	.loc	6	442	0
	mul.ftz.f32 	%f118, %f93, %f103;
	mul.ftz.f32 	%f119, %f102, %f93;
	fma.rn.ftz.f32 	%f120, %f87, %f104, %f118;
	fma.rn.ftz.f32 	%f121, %f87, %f98, %f119;
	fma.rn.ftz.f32 	%f122, %f96, %f105, %f120;
	fma.rn.ftz.f32 	%f123, %f96, %f100, %f121;
	rcp.approx.ftz.f32 	%f124, %f122;
	mul.ftz.f32 	%f125, %f123, %f124;
	mul.ftz.f32 	%f126, %f117, %f124;
	ld.param.f32 	%f127, [__cudaparm_cuda_motion_renderquad_tWidth];
	mul.ftz.f32 	%f128, %f127, %f125;
	ld.param.f32 	%f129, [__cudaparm_cuda_motion_renderquad_tHeight];
	mul.ftz.f32 	%f130, %f129, %f126;
	mov.f32 	%f131, 0fbf000000;   	// -0.5
	add.ftz.f32 	%f132, %f128, %f131;
	mov.f32 	%f133, 0fbf000000;   	// -0.5
	add.ftz.f32 	%f134, %f130, %f133;
	cvt.rmi.ftz.f32.f32 	%f135, %f132;
	cvt.rmi.ftz.f32.f32 	%f136, %f134;
	mov.f32 	%f137, 0fbf000000;   	// -0.5
	add.ftz.f32 	%f138, %f135, %f137;
	mov.f32 	%f139, 0fbf000000;   	// -0.5
	add.ftz.f32 	%f140, %f136, %f139;
	mov.f32 	%f141, %f138;
	mov.f32 	%f142, %f140;
	mov.f32 	%f143, 0f00000000;   	// 0
	mov.f32 	%f144, %f143;
	mov.f32 	%f145, 0f00000000;   	// 0
	mov.f32 	%f146, %f145;
	tex.2d.v4.f32.f32 {%f147,%f148,%f149,%f150},[sPointTexture,{%f141,%f142,%f144,%f146}];
	.loc	6	397	0
	mov.f32 	%f151, %f147;
	mov.f32 	%f152, %f148;
	mov.f32 	%f153, %f149;
	mov.f32 	%f154, %f150;
	.loc	6	62	0
	sub.ftz.f32 	%f155, %f132, %f135;
	sub.ftz.f32 	%f156, %f134, %f136;
	mov.f32 	%f157, 0f3f99999a;   	// 1.2
	mov.f32 	%f158, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f159, %f158, %f155, %f157;
	mov.f32 	%f160, 0f3f99999a;   	// 1.2
	mov.f32 	%f161, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f162, %f161, %f156, %f160;
	mov.f32 	%f163, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f164, %f155, %f159, %f163;
	mov.f32 	%f165, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f166, %f156, %f162, %f165;
	mul.ftz.f32 	%f167, %f155, %f164;
	mul.ftz.f32 	%f168, %f156, %f166;
	mul.ftz.f32 	%f169, %f167, %f168;
	mul.ftz.f32 	%f170, %f169, %f151;
	.loc	6	63	0
	mul.ftz.f32 	%f171, %f169, %f152;
	.loc	6	64	0
	mul.ftz.f32 	%f172, %f169, %f153;
	.loc	6	65	0
	mul.ftz.f32 	%f173, %f169, %f154;
	.loc	6	443	0
	mov.f32 	%f174, 0f3f800000;   	// 1
	add.ftz.f32 	%f175, %f138, %f174;
	mov.f32 	%f176, %f175;
	mov.f32 	%f177, %f140;
	mov.f32 	%f178, 0f00000000;   	// 0
	mov.f32 	%f179, %f178;
	mov.f32 	%f180, 0f00000000;   	// 0
	mov.f32 	%f181, %f180;
	tex.2d.v4.f32.f32 {%f182,%f183,%f184,%f185},[sPointTexture,{%f176,%f177,%f179,%f181}];
	.loc	6	397	0
	mov.f32 	%f151, %f182;
	mov.f32 	%f152, %f183;
	mov.f32 	%f153, %f184;
	mov.f32 	%f154, %f185;
	.loc	6	53	0
	mov.f32 	%f186, 0fc019999a;   	// -2.4
	mov.f32 	%f187, 0f3fb33333;   	// 1.4
	fma.rn.ftz.f32 	%f188, %f187, %f155, %f186;
	mul.ftz.f32 	%f189, %f155, %f188;
	mov.f32 	%f190, 0f3f800000;   	// 1
	fma.rn.ftz.f32 	%f191, %f155, %f189, %f190;
	mul.ftz.f32 	%f192, %f191, %f168;
	fma.rn.ftz.f32 	%f193, %f151, %f192, %f170;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f194, %f152, %f192, %f171;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f195, %f153, %f192, %f172;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f196, %f154, %f192, %f173;
	.loc	6	444	0
	mov.f32 	%f197, 0f40000000;   	// 2
	add.ftz.f32 	%f198, %f138, %f197;
	mov.f32 	%f199, %f198;
	mov.f32 	%f200, %f140;
	mov.f32 	%f201, 0f00000000;   	// 0
	mov.f32 	%f202, %f201;
	mov.f32 	%f203, 0f00000000;   	// 0
	mov.f32 	%f204, %f203;
	tex.2d.v4.f32.f32 {%f205,%f206,%f207,%f208},[sPointTexture,{%f199,%f200,%f202,%f204}];
	.loc	6	397	0
	mov.f32 	%f151, %f205;
	mov.f32 	%f152, %f206;
	mov.f32 	%f153, %f207;
	mov.f32 	%f154, %f208;
	.loc	6	53	0
	mov.f32 	%f209, 0f3f800000;   	// 1
	sub.ftz.f32 	%f210, %f209, %f155;
	mov.f32 	%f211, 0fc019999a;   	// -2.4
	mov.f32 	%f212, 0f3fb33333;   	// 1.4
	fma.rn.ftz.f32 	%f213, %f212, %f210, %f211;
	mul.ftz.f32 	%f214, %f210, %f213;
	mov.f32 	%f215, 0f3f800000;   	// 1
	fma.rn.ftz.f32 	%f216, %f210, %f214, %f215;
	mul.ftz.f32 	%f217, %f168, %f216;
	fma.rn.ftz.f32 	%f218, %f151, %f217, %f193;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f219, %f152, %f217, %f194;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f220, %f153, %f217, %f195;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f221, %f154, %f217, %f196;
	.loc	6	445	0
	mov.f32 	%f222, 0f40400000;   	// 3
	add.ftz.f32 	%f223, %f138, %f222;
	mov.f32 	%f224, %f223;
	mov.f32 	%f225, %f140;
	mov.f32 	%f226, 0f00000000;   	// 0
	mov.f32 	%f227, %f226;
	mov.f32 	%f228, 0f00000000;   	// 0
	mov.f32 	%f229, %f228;
	tex.2d.v4.f32.f32 {%f230,%f231,%f232,%f233},[sPointTexture,{%f224,%f225,%f227,%f229}];
	.loc	6	397	0
	mov.f32 	%f151, %f230;
	mov.f32 	%f152, %f231;
	mov.f32 	%f153, %f232;
	mov.f32 	%f154, %f233;
	.loc	6	53	0
	mov.f32 	%f234, 0f3f99999a;   	// 1.2
	mov.f32 	%f235, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f236, %f235, %f210, %f234;
	mov.f32 	%f237, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f238, %f210, %f236, %f237;
	mul.ftz.f32 	%f239, %f210, %f238;
	mul.ftz.f32 	%f240, %f168, %f239;
	fma.rn.ftz.f32 	%f241, %f151, %f240, %f218;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f242, %f152, %f240, %f219;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f243, %f153, %f240, %f220;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f244, %f154, %f240, %f221;
	.loc	6	446	0
	mov.f32 	%f245, 0f3f800000;   	// 1
	add.ftz.f32 	%f246, %f140, %f245;
	mov.f32 	%f247, %f138;
	mov.f32 	%f248, %f246;
	mov.f32 	%f249, 0f00000000;   	// 0
	mov.f32 	%f250, %f249;
	mov.f32 	%f251, 0f00000000;   	// 0
	mov.f32 	%f252, %f251;
	tex.2d.v4.f32.f32 {%f253,%f254,%f255,%f256},[sPointTexture,{%f247,%f248,%f250,%f252}];
	.loc	6	397	0
	mov.f32 	%f151, %f253;
	mov.f32 	%f152, %f254;
	mov.f32 	%f153, %f255;
	mov.f32 	%f154, %f256;
	.loc	6	53	0
	mov.f32 	%f257, 0fc019999a;   	// -2.4
	mov.f32 	%f258, 0f3fb33333;   	// 1.4
	fma.rn.ftz.f32 	%f259, %f258, %f156, %f257;
	mul.ftz.f32 	%f260, %f156, %f259;
	mov.f32 	%f261, 0f3f800000;   	// 1
	fma.rn.ftz.f32 	%f262, %f156, %f260, %f261;
	mul.ftz.f32 	%f263, %f167, %f262;
	fma.rn.ftz.f32 	%f264, %f151, %f263, %f241;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f265, %f152, %f263, %f242;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f266, %f153, %f263, %f243;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f267, %f154, %f263, %f244;
	.loc	6	447	0
	mov.f32 	%f268, %f175;
	mov.f32 	%f269, %f246;
	mov.f32 	%f270, 0f00000000;   	// 0
	mov.f32 	%f271, %f270;
	mov.f32 	%f272, 0f00000000;   	// 0
	mov.f32 	%f273, %f272;
	tex.2d.v4.f32.f32 {%f274,%f275,%f276,%f277},[sPointTexture,{%f268,%f269,%f271,%f273}];
	.loc	6	397	0
	mov.f32 	%f151, %f274;
	mov.f32 	%f152, %f275;
	mov.f32 	%f153, %f276;
	mov.f32 	%f154, %f277;
	.loc	6	53	0
	mul.ftz.f32 	%f278, %f191, %f262;
	fma.rn.ftz.f32 	%f279, %f151, %f278, %f264;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f280, %f152, %f278, %f265;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f281, %f153, %f278, %f266;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f282, %f154, %f278, %f267;
	.loc	6	448	0
	mov.f32 	%f283, %f198;
	mov.f32 	%f284, %f246;
	mov.f32 	%f285, 0f00000000;   	// 0
	mov.f32 	%f286, %f285;
	mov.f32 	%f287, 0f00000000;   	// 0
	mov.f32 	%f288, %f287;
	tex.2d.v4.f32.f32 {%f289,%f290,%f291,%f292},[sPointTexture,{%f283,%f284,%f286,%f288}];
	.loc	6	397	0
	mov.f32 	%f151, %f289;
	mov.f32 	%f152, %f290;
	mov.f32 	%f153, %f291;
	mov.f32 	%f154, %f292;
	.loc	6	53	0
	mul.ftz.f32 	%f293, %f262, %f216;
	fma.rn.ftz.f32 	%f294, %f151, %f293, %f279;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f295, %f152, %f293, %f280;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f296, %f153, %f293, %f281;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f297, %f154, %f293, %f282;
	.loc	6	449	0
	mov.f32 	%f298, %f223;
	mov.f32 	%f299, %f246;
	mov.f32 	%f300, 0f00000000;   	// 0
	mov.f32 	%f301, %f300;
	mov.f32 	%f302, 0f00000000;   	// 0
	mov.f32 	%f303, %f302;
	tex.2d.v4.f32.f32 {%f304,%f305,%f306,%f307},[sPointTexture,{%f298,%f299,%f301,%f303}];
	.loc	6	397	0
	mov.f32 	%f151, %f304;
	mov.f32 	%f152, %f305;
	mov.f32 	%f153, %f306;
	mov.f32 	%f154, %f307;
	.loc	6	53	0
	mul.ftz.f32 	%f308, %f262, %f239;
	fma.rn.ftz.f32 	%f309, %f151, %f308, %f294;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f310, %f152, %f308, %f295;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f311, %f153, %f308, %f296;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f312, %f154, %f308, %f297;
	.loc	6	450	0
	mov.f32 	%f313, 0f40000000;   	// 2
	add.ftz.f32 	%f314, %f140, %f313;
	mov.f32 	%f315, %f138;
	mov.f32 	%f316, %f314;
	mov.f32 	%f317, 0f00000000;   	// 0
	mov.f32 	%f318, %f317;
	mov.f32 	%f319, 0f00000000;   	// 0
	mov.f32 	%f320, %f319;
	tex.2d.v4.f32.f32 {%f321,%f322,%f323,%f324},[sPointTexture,{%f315,%f316,%f318,%f320}];
	.loc	6	397	0
	mov.f32 	%f151, %f321;
	mov.f32 	%f152, %f322;
	mov.f32 	%f153, %f323;
	mov.f32 	%f154, %f324;
	.loc	6	53	0
	mov.f32 	%f325, 0f3f800000;   	// 1
	sub.ftz.f32 	%f326, %f325, %f156;
	mov.f32 	%f327, 0fc019999a;   	// -2.4
	mov.f32 	%f328, 0f3fb33333;   	// 1.4
	fma.rn.ftz.f32 	%f329, %f328, %f326, %f327;
	mul.ftz.f32 	%f330, %f326, %f329;
	mov.f32 	%f331, 0f3f800000;   	// 1
	fma.rn.ftz.f32 	%f332, %f326, %f330, %f331;
	mul.ftz.f32 	%f333, %f167, %f332;
	fma.rn.ftz.f32 	%f334, %f151, %f333, %f309;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f335, %f152, %f333, %f310;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f336, %f153, %f333, %f311;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f337, %f154, %f333, %f312;
	.loc	6	451	0
	mov.f32 	%f338, %f175;
	mov.f32 	%f339, %f314;
	mov.f32 	%f340, 0f00000000;   	// 0
	mov.f32 	%f341, %f340;
	mov.f32 	%f342, 0f00000000;   	// 0
	mov.f32 	%f343, %f342;
	tex.2d.v4.f32.f32 {%f344,%f345,%f346,%f347},[sPointTexture,{%f338,%f339,%f341,%f343}];
	.loc	6	397	0
	mov.f32 	%f151, %f344;
	mov.f32 	%f152, %f345;
	mov.f32 	%f153, %f346;
	mov.f32 	%f154, %f347;
	.loc	6	53	0
	mul.ftz.f32 	%f348, %f191, %f332;
	fma.rn.ftz.f32 	%f349, %f151, %f348, %f334;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f350, %f152, %f348, %f335;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f351, %f153, %f348, %f336;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f352, %f154, %f348, %f337;
	.loc	6	452	0
	mov.f32 	%f353, %f198;
	mov.f32 	%f354, %f314;
	mov.f32 	%f355, 0f00000000;   	// 0
	mov.f32 	%f356, %f355;
	mov.f32 	%f357, 0f00000000;   	// 0
	mov.f32 	%f358, %f357;
	tex.2d.v4.f32.f32 {%f359,%f360,%f361,%f362},[sPointTexture,{%f353,%f354,%f356,%f358}];
	.loc	6	397	0
	mov.f32 	%f151, %f359;
	mov.f32 	%f152, %f360;
	mov.f32 	%f153, %f361;
	mov.f32 	%f154, %f362;
	.loc	6	53	0
	mul.ftz.f32 	%f363, %f216, %f332;
	fma.rn.ftz.f32 	%f364, %f151, %f363, %f349;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f365, %f152, %f363, %f350;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f366, %f153, %f363, %f351;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f367, %f154, %f363, %f352;
	.loc	6	453	0
	mov.f32 	%f368, %f223;
	mov.f32 	%f369, %f314;
	mov.f32 	%f370, 0f00000000;   	// 0
	mov.f32 	%f371, %f370;
	mov.f32 	%f372, 0f00000000;   	// 0
	mov.f32 	%f373, %f372;
	tex.2d.v4.f32.f32 {%f374,%f375,%f376,%f377},[sPointTexture,{%f368,%f369,%f371,%f373}];
	.loc	6	397	0
	mov.f32 	%f151, %f374;
	mov.f32 	%f152, %f375;
	mov.f32 	%f153, %f376;
	mov.f32 	%f154, %f377;
	.loc	6	53	0
	mul.ftz.f32 	%f378, %f239, %f332;
	fma.rn.ftz.f32 	%f379, %f151, %f378, %f364;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f380, %f152, %f378, %f365;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f381, %f153, %f378, %f366;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f382, %f154, %f378, %f367;
	.loc	6	454	0
	mov.f32 	%f383, 0f40400000;   	// 3
	add.ftz.f32 	%f384, %f140, %f383;
	mov.f32 	%f385, %f138;
	mov.f32 	%f386, %f384;
	mov.f32 	%f387, 0f00000000;   	// 0
	mov.f32 	%f388, %f387;
	mov.f32 	%f389, 0f00000000;   	// 0
	mov.f32 	%f390, %f389;
	tex.2d.v4.f32.f32 {%f391,%f392,%f393,%f394},[sPointTexture,{%f385,%f386,%f388,%f390}];
	.loc	6	397	0
	mov.f32 	%f151, %f391;
	mov.f32 	%f152, %f392;
	mov.f32 	%f153, %f393;
	mov.f32 	%f154, %f394;
	.loc	6	53	0
	mov.f32 	%f395, 0f3f99999a;   	// 1.2
	mov.f32 	%f396, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f397, %f396, %f326, %f395;
	mov.f32 	%f398, 0fbf19999a;   	// -0.6
	fma.rn.ftz.f32 	%f399, %f326, %f397, %f398;
	mul.ftz.f32 	%f400, %f326, %f399;
	mul.ftz.f32 	%f401, %f167, %f400;
	fma.rn.ftz.f32 	%f402, %f151, %f401, %f379;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f403, %f152, %f401, %f380;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f404, %f153, %f401, %f381;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f405, %f154, %f401, %f382;
	.loc	6	455	0
	mov.f32 	%f406, %f175;
	mov.f32 	%f407, %f384;
	mov.f32 	%f408, 0f00000000;   	// 0
	mov.f32 	%f409, %f408;
	mov.f32 	%f410, 0f00000000;   	// 0
	mov.f32 	%f411, %f410;
	tex.2d.v4.f32.f32 {%f412,%f413,%f414,%f415},[sPointTexture,{%f406,%f407,%f409,%f411}];
	.loc	6	397	0
	mov.f32 	%f151, %f412;
	mov.f32 	%f152, %f413;
	mov.f32 	%f153, %f414;
	mov.f32 	%f154, %f415;
	.loc	6	53	0
	mul.ftz.f32 	%f416, %f191, %f400;
	fma.rn.ftz.f32 	%f417, %f151, %f416, %f402;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f418, %f152, %f416, %f403;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f419, %f153, %f416, %f404;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f420, %f154, %f416, %f405;
	.loc	6	456	0
	mov.f32 	%f421, %f198;
	mov.f32 	%f422, %f384;
	mov.f32 	%f423, 0f00000000;   	// 0
	mov.f32 	%f424, %f423;
	mov.f32 	%f425, 0f00000000;   	// 0
	mov.f32 	%f426, %f425;
	tex.2d.v4.f32.f32 {%f427,%f428,%f429,%f430},[sPointTexture,{%f421,%f422,%f424,%f426}];
	.loc	6	397	0
	mov.f32 	%f151, %f427;
	mov.f32 	%f152, %f428;
	mov.f32 	%f153, %f429;
	mov.f32 	%f154, %f430;
	.loc	6	53	0
	mul.ftz.f32 	%f431, %f216, %f400;
	fma.rn.ftz.f32 	%f432, %f151, %f431, %f417;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f433, %f152, %f431, %f418;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f434, %f153, %f431, %f419;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f435, %f154, %f431, %f420;
	.loc	6	457	0
	mov.f32 	%f436, %f223;
	mov.f32 	%f437, %f384;
	mov.f32 	%f438, 0f00000000;   	// 0
	mov.f32 	%f439, %f438;
	mov.f32 	%f440, 0f00000000;   	// 0
	mov.f32 	%f441, %f440;
	tex.2d.v4.f32.f32 {%f442,%f443,%f444,%f445},[sPointTexture,{%f436,%f437,%f439,%f441}];
	.loc	6	397	0
	mov.f32 	%f151, %f442;
	mov.f32 	%f152, %f443;
	mov.f32 	%f153, %f444;
	mov.f32 	%f154, %f445;
	.loc	6	53	0
	mul.ftz.f32 	%f446, %f239, %f400;
	fma.rn.ftz.f32 	%f447, %f151, %f446, %f432;
	.loc	6	54	0
	fma.rn.ftz.f32 	%f448, %f152, %f446, %f433;
	.loc	6	55	0
	fma.rn.ftz.f32 	%f449, %f153, %f446, %f434;
	.loc	6	56	0
	fma.rn.ftz.f32 	%f450, %f154, %f446, %f435;
	.loc	6	536	0
	mul.ftz.f32 	%f451, %f65, %f447;
	mul.ftz.f32 	%f452, %f65, %f448;
	mul.ftz.f32 	%f453, %f65, %f449;
	mul.ftz.f32 	%f454, %f65, %f450;
	ld.param.u32 	%r28, [__cudaparm_cuda_motion_renderquad_blendMode];
	mov.s32 	%r29, 18;
	setp.eq.s32 	%p5, %r28, %r29;
	ld.param.u32 	%r30, [__cudaparm_cuda_motion_renderquad_inDeviceFormat];
	mov.s32 	%r31, 0;
	setp.eq.s32 	%p6, %r30, %r31;
	ld.param.s32 	%r32, [__cudaparm_cuda_motion_renderquad_dstPitch];
	mul.lo.s32 	%r33, %r32, %r10;
	add.s32 	%r34, %r8, %r33;
	cvt.s64.s32 	%rd7, %r34;
	ld.param.u64 	%rd8, [__cudaparm_cuda_motion_renderquad_dstFrame];
	mov.u32 	%r35, 0;
	setp.eq.s32 	%p7, %r19, %r35;
	@%p7 bra 	$Lt_125_269570;
	@!%p6 bra 	$Lt_125_270082;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r36,%r37,%r38,%r39}, [%rd10+0];
	.loc	6	166	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r36;
	cvt.ftz.f32.f16	%f455, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r37;
	cvt.ftz.f32.f16	%f456, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r38;
	cvt.ftz.f32.f16	%f457, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r39;
	cvt.ftz.f32.f16	%f458, %b1; }
	bra.uni 	$Lt_125_269826;
$Lt_125_270082:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f455,%f456,%f457,%f458}, [%rd12+0];
$Lt_125_269826:
	@!%p5 bra 	$Lt_125_270594;
	.loc	6	170	0
	ld.param.f32 	%f459, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f460, %f459, %f454;
	cvt.ftz.sat.f32.f32 	%f461, %f458;
	mov.f32 	%f462, 0f3f800000;   	// 1
	sub.ftz.f32 	%f463, %f462, %f460;
	mul.ftz.f32 	%f464, %f461, %f463;
	add.ftz.f32 	%f465, %f464, %f460;
	mov.f32 	%f466, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f467, %f465, %f466;
	mov.f32 	%f468, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p8, %f467, %f468;
	@!%p8 bra 	$Lt_125_271106;
	mov.f32 	%f469, 0f00000000;   	// 0
	mov.f32 	%f470, 0f00000000;   	// 0
	mov.f32 	%f471, 0f00000000;   	// 0
	mov.f32 	%f472, 0f00000000;   	// 0
	bra.uni 	$Lt_125_270850;
$Lt_125_271106:
	mov.f32 	%f473, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p9, %f455, %f473;
	@!%p9 bra 	$Lt_125_271362;
	.loc	5	234	0
	neg.ftz.f32 	%f474, %f455;
	lg2.approx.ftz.f32 	%f475, %f474;
	mov.f32 	%f476, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f477, %f475, %f476;
	ex2.approx.ftz.f32 	%f478, %f477;
	neg.ftz.f32 	%f479, %f478;
	bra.uni 	$LDWendi___log2f_302_74;
$Lt_125_271362:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f480, %f455;
	mov.f32 	%f481, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f482, %f480, %f481;
	ex2.approx.ftz.f32 	%f479, %f482;
$LDWendi___log2f_302_74:
	.loc	22	97	0
	mov.f32 	%f483, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p10, %f456, %f483;
	@!%p10 bra 	$Lt_125_271874;
	.loc	5	234	0
	neg.ftz.f32 	%f484, %f456;
	lg2.approx.ftz.f32 	%f485, %f484;
	mov.f32 	%f486, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f487, %f485, %f486;
	ex2.approx.ftz.f32 	%f488, %f487;
	neg.ftz.f32 	%f489, %f488;
	bra.uni 	$LDWendi___log2f_302_72;
$Lt_125_271874:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f490, %f456;
	mov.f32 	%f491, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f492, %f490, %f491;
	ex2.approx.ftz.f32 	%f489, %f492;
$LDWendi___log2f_302_72:
	.loc	22	98	0
	mov.f32 	%f493, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p11, %f457, %f493;
	@!%p11 bra 	$Lt_125_272386;
	.loc	5	234	0
	neg.ftz.f32 	%f494, %f457;
	lg2.approx.ftz.f32 	%f495, %f494;
	mov.f32 	%f496, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f497, %f495, %f496;
	ex2.approx.ftz.f32 	%f498, %f497;
	neg.ftz.f32 	%f499, %f498;
	bra.uni 	$LDWendi___log2f_302_70;
$Lt_125_272386:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f500, %f457;
	mov.f32 	%f501, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f502, %f500, %f501;
	ex2.approx.ftz.f32 	%f499, %f502;
$LDWendi___log2f_302_70:
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f503, %f465;
	mov.f32 	%f504, %f503;
	mov.f32 	%f505, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f506, %f503, %f505;
	mov.f32 	%f507, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p12, %f506, %f507;
	@%p12 bra 	$Lt_125_273154;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f508, %f503;
	mul.ftz.f32 	%f509, %f459, %f453;
	fma.rn.ftz.f32 	%f510, %f464, %f499, %f509;
	mul.ftz.f32 	%f511, %f508, %f510;
	.loc	5	214	0
	mul.ftz.f32 	%f512, %f459, %f452;
	fma.rn.ftz.f32 	%f513, %f464, %f489, %f512;
	mul.ftz.f32 	%f514, %f508, %f513;
	.loc	5	215	0
	mul.ftz.f32 	%f515, %f459, %f451;
	fma.rn.ftz.f32 	%f516, %f464, %f479, %f515;
	mul.ftz.f32 	%f517, %f508, %f516;
	bra.uni 	$Lt_125_272898;
$Lt_125_273154:
	.loc	5	219	0
	mov.f32 	%f511, 0f00000000;   	// 0
	mov.f32 	%f514, 0f00000000;   	// 0
	mov.f32 	%f517, 0f00000000;   	// 0
	mov.f32 	%f504, 0f00000000;   	// 0
$Lt_125_272898:
	.loc	5	266	0
	mov.f32 	%f518, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p13, %f517, %f518;
	@!%p13 bra 	$Lt_125_273410;
	.loc	5	242	0
	neg.ftz.f32 	%f519, %f517;
	lg2.approx.ftz.f32 	%f520, %f519;
	mov.f32 	%f521, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f522, %f520, %f521;
	ex2.approx.ftz.f32 	%f523, %f522;
	neg.ftz.f32 	%f524, %f523;
	bra.uni 	$LDWendi___log2f_302_68;
$Lt_125_273410:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f525, %f517;
	mov.f32 	%f526, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f527, %f525, %f526;
	ex2.approx.ftz.f32 	%f524, %f527;
$LDWendi___log2f_302_68:
	.loc	5	267	0
	mov.f32 	%f528, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p14, %f514, %f528;
	@!%p14 bra 	$Lt_125_273922;
	.loc	5	242	0
	neg.ftz.f32 	%f529, %f514;
	lg2.approx.ftz.f32 	%f530, %f529;
	mov.f32 	%f531, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f532, %f530, %f531;
	ex2.approx.ftz.f32 	%f533, %f532;
	neg.ftz.f32 	%f534, %f533;
	bra.uni 	$LDWendi___log2f_302_66;
$Lt_125_273922:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f535, %f514;
	mov.f32 	%f536, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f537, %f535, %f536;
	ex2.approx.ftz.f32 	%f534, %f537;
$LDWendi___log2f_302_66:
	.loc	5	268	0
	mov.f32 	%f538, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p15, %f511, %f538;
	@!%p15 bra 	$Lt_125_274434;
	.loc	5	242	0
	neg.ftz.f32 	%f539, %f511;
	lg2.approx.ftz.f32 	%f540, %f539;
	mov.f32 	%f541, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f542, %f540, %f541;
	ex2.approx.ftz.f32 	%f543, %f542;
	neg.ftz.f32 	%f544, %f543;
	bra.uni 	$LDWendi___log2f_302_64;
$Lt_125_274434:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f545, %f511;
	mov.f32 	%f546, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f547, %f545, %f546;
	ex2.approx.ftz.f32 	%f544, %f547;
$LDWendi___log2f_302_64:
	.loc	22	101	0
	mov.f32 	%f472, %f524;
	mov.f32 	%f471, %f534;
	mov.f32 	%f470, %f544;
	mov.f32 	%f469, %f504;
$Lt_125_270850:
	.loc	6	170	0
	mov.f32 	%f451, %f472;
	mov.f32 	%f452, %f471;
	mov.f32 	%f453, %f470;
	mov.f32 	%f454, %f469;
	bra.uni 	$Lt_125_314370;
$Lt_125_270594:
	mov.f32 	%f548, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f549, %f454, %f548;
	mov.f32 	%f550, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p16, %f549, %f550;
	@!%p16 bra 	$Lt_125_275202;
	mov.f32 	%f454, 0f00000000;   	// 0
	mov.f32 	%f453, 0f00000000;   	// 0
	mov.f32 	%f452, 0f00000000;   	// 0
	mov.f32 	%f451, 0f00000000;   	// 0
	bra.uni 	$Lt_125_274946;
$Lt_125_275202:
	.loc	6	183	0
	rcp.approx.ftz.f32 	%f551, %f454;
	mul.ftz.f32 	%f552, %f551, %f451;
	mov.f32 	%f553, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p17, %f552, %f553;
	@!%p17 bra 	$Lt_125_275458;
	.loc	5	242	0
	neg.ftz.f32 	%f554, %f552;
	lg2.approx.ftz.f32 	%f555, %f554;
	mov.f32 	%f556, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f557, %f555, %f556;
	ex2.approx.ftz.f32 	%f558, %f557;
	neg.ftz.f32 	%f559, %f558;
	bra.uni 	$LDWendi___log2f_302_62;
$Lt_125_275458:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f560, %f552;
	mov.f32 	%f561, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f562, %f560, %f561;
	ex2.approx.ftz.f32 	%f559, %f562;
$LDWendi___log2f_302_62:
	.loc	6	183	0
	mov.f32 	%f451, %f559;
	.loc	6	184	0
	mul.ftz.f32 	%f563, %f551, %f452;
	mov.f32 	%f564, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p18, %f563, %f564;
	@!%p18 bra 	$Lt_125_275970;
	.loc	5	242	0
	neg.ftz.f32 	%f565, %f563;
	lg2.approx.ftz.f32 	%f566, %f565;
	mov.f32 	%f567, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f568, %f566, %f567;
	ex2.approx.ftz.f32 	%f569, %f568;
	neg.ftz.f32 	%f570, %f569;
	bra.uni 	$LDWendi___log2f_302_60;
$Lt_125_275970:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f571, %f563;
	mov.f32 	%f572, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f573, %f571, %f572;
	ex2.approx.ftz.f32 	%f570, %f573;
$LDWendi___log2f_302_60:
	.loc	6	184	0
	mov.f32 	%f452, %f570;
	.loc	6	185	0
	mul.ftz.f32 	%f574, %f551, %f453;
	mov.f32 	%f575, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p19, %f574, %f575;
	@!%p19 bra 	$Lt_125_276482;
	.loc	5	242	0
	neg.ftz.f32 	%f576, %f574;
	lg2.approx.ftz.f32 	%f577, %f576;
	mov.f32 	%f578, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f579, %f577, %f578;
	ex2.approx.ftz.f32 	%f580, %f579;
	neg.ftz.f32 	%f581, %f580;
	bra.uni 	$LDWendi___log2f_302_58;
$Lt_125_276482:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f582, %f574;
	mov.f32 	%f583, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f584, %f582, %f583;
	ex2.approx.ftz.f32 	%f581, %f584;
$LDWendi___log2f_302_58:
	.loc	6	185	0
	mov.f32 	%f453, %f581;
$Lt_125_274946:
	.loc	6	189	0
	mov.u32 	%r40, 3;
	setp.eq.s32 	%p20, %r28, %r40;
	@%p20 bra 	$Lt_125_258;
	mov.u32 	%r41, 11;
	setp.eq.s32 	%p21, %r28, %r41;
	@%p21 bra 	$Lt_125_770;
	mov.u32 	%r42, 17;
	setp.eq.s32 	%p22, %r28, %r42;
	@%p22 bra 	$Lt_125_1026;
	mov.u32 	%r43, 22;
	setp.eq.s32 	%p23, %r28, %r43;
	@%p23 bra 	$Lt_125_1282;
	mov.u32 	%r44, 6;
	setp.eq.s32 	%p24, %r28, %r44;
	@%p24 bra 	$Lt_125_1538;
	mov.u32 	%r45, 1;
	setp.eq.s32 	%p25, %r28, %r45;
	@%p25 bra 	$Lt_125_1794;
	mov.u32 	%r46, 13;
	setp.eq.s32 	%p26, %r28, %r46;
	@%p26 bra 	$Lt_125_2050;
	mov.u32 	%r47, 4;
	setp.eq.s32 	%p27, %r28, %r47;
	@%p27 bra 	$Lt_125_2306;
	mov.u32 	%r48, 2;
	setp.eq.s32 	%p28, %r28, %r48;
	@%p28 bra 	$Lt_125_2562;
	mov.u32 	%r49, 14;
	setp.eq.s32 	%p29, %r28, %r49;
	@%p29 bra 	$Lt_125_2818;
	mov.u32 	%r50, 12;
	setp.eq.s32 	%p30, %r28, %r50;
	@%p30 bra 	$Lt_125_3074;
	mov.u32 	%r51, 19;
	setp.eq.s32 	%p31, %r28, %r51;
	@%p31 bra 	$Lt_125_3330;
	mov.u32 	%r52, 23;
	setp.eq.s32 	%p32, %r28, %r52;
	@%p32 bra 	$Lt_125_3586;
	mov.u32 	%r53, 8;
	setp.eq.s32 	%p33, %r28, %r53;
	@%p33 bra 	$Lt_125_3842;
	mov.u32 	%r54, 24;
	setp.eq.s32 	%p34, %r28, %r54;
	@%p34 bra 	$Lt_125_4098;
	mov.u32 	%r55, 15;
	setp.eq.s32 	%p35, %r28, %r55;
	@%p35 bra 	$Lt_125_4354;
	mov.u32 	%r56, 20;
	setp.eq.s32 	%p36, %r28, %r56;
	@%p36 bra 	$Lt_125_4610;
	mov.u32 	%r57, 9;
	setp.eq.s32 	%p37, %r28, %r57;
	@%p37 bra 	$Lt_125_4866;
	mov.u32 	%r58, 5;
	setp.eq.s32 	%p38, %r28, %r58;
	@%p38 bra 	$Lt_125_5122;
	mov.u32 	%r59, 7;
	setp.eq.s32 	%p39, %r28, %r59;
	@%p39 bra 	$Lt_125_5378;
	mov.u32 	%r60, 25;
	setp.eq.s32 	%p40, %r28, %r60;
	@%p40 bra 	$Lt_125_5634;
	mov.u32 	%r61, 26;
	setp.eq.s32 	%p41, %r28, %r61;
	@%p41 bra 	$Lt_125_5890;
	mov.u32 	%r62, 10;
	setp.eq.s32 	%p42, %r28, %r62;
	@%p42 bra 	$Lt_125_6146;
	mov.u32 	%r63, 21;
	setp.eq.s32 	%p43, %r28, %r63;
	@%p43 bra 	$Lt_125_6402;
	mov.u32 	%r64, 0;
	setp.eq.s32 	%p44, %r28, %r64;
	@%p44 bra 	$Lt_125_6658;
	mov.u32 	%r65, 16;
	setp.eq.s32 	%p45, %r28, %r65;
	@%p45 bra 	$Lt_125_6914;
	bra.uni 	$Lt_125_314370;
$Lt_125_258:
	.loc	22	469	0
	ld.param.f32 	%f585, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f585, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f588, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f588;
	mov.f32 	%f590, %f589;
	mov.f32 	%f591, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f592, %f589, %f591;
	mov.f32 	%f593, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p46, %f592, %f593;
	@!%p46 bra 	$Lt_125_277250;
	mov.f32 	%f594, 0f00000000;   	// 0
	mov.f32 	%f595, 0f00000000;   	// 0
	mov.f32 	%f596, 0f00000000;   	// 0
	mov.f32 	%f590, 0f00000000;   	// 0
	bra.uni 	$Lt_125_276994;
$Lt_125_277250:
	mov.f32 	%f597, 0f3f800000;   	// 1
	sub.ftz.f32 	%f598, %f597, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f601, 0f3f800000;   	// 1
	mul.ftz.f32 	%f602, %f586, %f599;
	sub.ftz.f32 	%f603, %f601, %f602;
	min.ftz.f32 	%f604, %f455, %f451;
	mul.ftz.f32 	%f605, %f458, %f604;
	fma.rn.ftz.f32 	%f606, %f451, %f598, %f605;
	mul.ftz.f32 	%f607, %f600, %f606;
	fma.rn.ftz.f32 	%f596, %f455, %f603, %f607;
	min.ftz.f32 	%f608, %f456, %f452;
	mul.ftz.f32 	%f609, %f458, %f608;
	fma.rn.ftz.f32 	%f610, %f452, %f598, %f609;
	mul.ftz.f32 	%f611, %f600, %f610;
	fma.rn.ftz.f32 	%f595, %f456, %f603, %f611;
	min.ftz.f32 	%f612, %f457, %f453;
	mul.ftz.f32 	%f613, %f458, %f612;
	fma.rn.ftz.f32 	%f614, %f453, %f598, %f613;
	mul.ftz.f32 	%f615, %f600, %f614;
	fma.rn.ftz.f32 	%f594, %f457, %f603, %f615;
$Lt_125_276994:
	.loc	6	191	0
	mov.f32 	%f451, %f596;
	mov.f32 	%f452, %f595;
	mov.f32 	%f453, %f594;
	mov.f32 	%f454, %f590;
	bra.uni 	$Lt_125_314370;
$Lt_125_770:
	.loc	22	470	0
	ld.param.f32 	%f616, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f616, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f617, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f617;
	mov.f32 	%f618, %f589;
	mov.f32 	%f619, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f620, %f589, %f619;
	mov.f32 	%f621, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p47, %f620, %f621;
	@!%p47 bra 	$Lt_125_277762;
	mov.f32 	%f622, 0f00000000;   	// 0
	mov.f32 	%f623, 0f00000000;   	// 0
	mov.f32 	%f624, 0f00000000;   	// 0
	mov.f32 	%f618, 0f00000000;   	// 0
	bra.uni 	$Lt_125_277506;
$Lt_125_277762:
	mov.f32 	%f625, 0f3f800000;   	// 1
	sub.ftz.f32 	%f598, %f625, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f626, 0f3f800000;   	// 1
	mul.ftz.f32 	%f627, %f586, %f599;
	sub.ftz.f32 	%f603, %f626, %f627;
	max.ftz.f32 	%f628, %f455, %f451;
	mul.ftz.f32 	%f629, %f458, %f628;
	fma.rn.ftz.f32 	%f630, %f451, %f598, %f629;
	mul.ftz.f32 	%f631, %f600, %f630;
	fma.rn.ftz.f32 	%f624, %f455, %f603, %f631;
	max.ftz.f32 	%f632, %f456, %f452;
	mul.ftz.f32 	%f633, %f458, %f632;
	fma.rn.ftz.f32 	%f634, %f452, %f598, %f633;
	mul.ftz.f32 	%f635, %f600, %f634;
	fma.rn.ftz.f32 	%f623, %f456, %f603, %f635;
	max.ftz.f32 	%f636, %f457, %f453;
	mul.ftz.f32 	%f637, %f458, %f636;
	fma.rn.ftz.f32 	%f638, %f453, %f598, %f637;
	mul.ftz.f32 	%f639, %f600, %f638;
	fma.rn.ftz.f32 	%f622, %f457, %f603, %f639;
$Lt_125_277506:
	.loc	6	192	0
	mov.f32 	%f451, %f624;
	mov.f32 	%f452, %f623;
	mov.f32 	%f453, %f622;
	mov.f32 	%f454, %f618;
	bra.uni 	$Lt_125_314370;
$Lt_125_1026:
	.loc	22	471	0
	ld.param.f32 	%f640, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f640, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f641, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f641;
	mov.f32 	%f642, %f589;
	mov.f32 	%f643, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f644, %f589, %f643;
	mov.f32 	%f645, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p48, %f644, %f645;
	@!%p48 bra 	$Lt_125_278274;
	mov.f32 	%f646, 0f00000000;   	// 0
	mov.f32 	%f647, 0f00000000;   	// 0
	mov.f32 	%f648, 0f00000000;   	// 0
	mov.f32 	%f642, 0f00000000;   	// 0
	bra.uni 	$Lt_125_278018;
$Lt_125_278274:
	mov.f32 	%f649, 0f3f800000;   	// 1
	sub.ftz.f32 	%f598, %f649, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f650, 0f3f800000;   	// 1
	mul.ftz.f32 	%f651, %f586, %f599;
	sub.ftz.f32 	%f603, %f650, %f651;
	mul.ftz.f32 	%f652, %f455, %f451;
	mov.f32 	%f653, 0f00000000;   	// 0
	max.ftz.f32 	%f654, %f652, %f653;
	mov.f32 	%f655, 0f3f800000;   	// 1
	min.ftz.f32 	%f656, %f654, %f655;
	mul.ftz.f32 	%f657, %f458, %f656;
	fma.rn.ftz.f32 	%f658, %f451, %f598, %f657;
	mul.ftz.f32 	%f659, %f600, %f658;
	fma.rn.ftz.f32 	%f648, %f455, %f603, %f659;
	mul.ftz.f32 	%f660, %f456, %f452;
	mov.f32 	%f661, 0f00000000;   	// 0
	max.ftz.f32 	%f662, %f660, %f661;
	mov.f32 	%f663, 0f3f800000;   	// 1
	min.ftz.f32 	%f664, %f662, %f663;
	mul.ftz.f32 	%f665, %f458, %f664;
	fma.rn.ftz.f32 	%f666, %f452, %f598, %f665;
	mul.ftz.f32 	%f667, %f600, %f666;
	fma.rn.ftz.f32 	%f647, %f456, %f603, %f667;
	mul.ftz.f32 	%f668, %f457, %f453;
	mov.f32 	%f669, 0f00000000;   	// 0
	max.ftz.f32 	%f670, %f668, %f669;
	mov.f32 	%f671, 0f3f800000;   	// 1
	min.ftz.f32 	%f672, %f670, %f671;
	mul.ftz.f32 	%f673, %f458, %f672;
	fma.rn.ftz.f32 	%f674, %f453, %f598, %f673;
	mul.ftz.f32 	%f675, %f600, %f674;
	fma.rn.ftz.f32 	%f646, %f457, %f603, %f675;
$Lt_125_278018:
	.loc	6	193	0
	mov.f32 	%f451, %f648;
	mov.f32 	%f452, %f647;
	mov.f32 	%f453, %f646;
	mov.f32 	%f454, %f642;
	bra.uni 	$Lt_125_314370;
$Lt_125_1282:
	.loc	22	472	0
	ld.param.f32 	%f676, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f676, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f677, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f677;
	mov.f32 	%f678, %f589;
	mov.f32 	%f679, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f680, %f589, %f679;
	mov.f32 	%f681, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p49, %f680, %f681;
	@!%p49 bra 	$Lt_125_278786;
	mov.f32 	%f682, 0f00000000;   	// 0
	mov.f32 	%f683, 0f00000000;   	// 0
	mov.f32 	%f684, 0f00000000;   	// 0
	mov.f32 	%f678, 0f00000000;   	// 0
	bra.uni 	$Lt_125_278530;
$Lt_125_278786:
	mov.f32 	%f685, 0f3f800000;   	// 1
	sub.ftz.f32 	%f598, %f685, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f686, 0f3f800000;   	// 1
	mul.ftz.f32 	%f687, %f586, %f599;
	sub.ftz.f32 	%f603, %f686, %f687;
	mov.f32 	%f688, 0f3f800000;   	// 1
	mov.f32 	%f689, 0f3f800000;   	// 1
	mov.f32 	%f690, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f691, %f455, %f690;
	mov.f32 	%f692, 0f3f800000;   	// 1
	min.ftz.f32 	%f693, %f691, %f692;
	sub.ftz.f32 	%f694, %f689, %f693;
	mov.f32 	%f695, 0f3f800000;   	// 1
	mov.f32 	%f696, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f697, %f451, %f696;
	mov.f32 	%f698, 0f3f800000;   	// 1
	min.ftz.f32 	%f699, %f697, %f698;
	sub.ftz.f32 	%f700, %f695, %f699;
	mul.ftz.f32 	%f701, %f694, %f700;
	sub.ftz.f32 	%f702, %f688, %f701;
	mov.f32 	%f703, 0f00000000;   	// 0
	max.ftz.f32 	%f704, %f702, %f703;
	mov.f32 	%f705, 0f3f800000;   	// 1
	min.ftz.f32 	%f706, %f704, %f705;
	mul.ftz.f32 	%f707, %f458, %f706;
	fma.rn.ftz.f32 	%f708, %f451, %f598, %f707;
	mul.ftz.f32 	%f709, %f600, %f708;
	fma.rn.ftz.f32 	%f684, %f455, %f603, %f709;
	mov.f32 	%f710, 0f3f800000;   	// 1
	mov.f32 	%f711, 0f3f800000;   	// 1
	mov.f32 	%f712, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f713, %f456, %f712;
	mov.f32 	%f714, 0f3f800000;   	// 1
	min.ftz.f32 	%f715, %f713, %f714;
	sub.ftz.f32 	%f716, %f711, %f715;
	mov.f32 	%f717, 0f3f800000;   	// 1
	mov.f32 	%f718, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f719, %f452, %f718;
	mov.f32 	%f720, 0f3f800000;   	// 1
	min.ftz.f32 	%f721, %f719, %f720;
	sub.ftz.f32 	%f722, %f717, %f721;
	mul.ftz.f32 	%f723, %f716, %f722;
	sub.ftz.f32 	%f724, %f710, %f723;
	mov.f32 	%f725, 0f00000000;   	// 0
	max.ftz.f32 	%f726, %f724, %f725;
	mov.f32 	%f727, 0f3f800000;   	// 1
	min.ftz.f32 	%f728, %f726, %f727;
	mul.ftz.f32 	%f729, %f458, %f728;
	fma.rn.ftz.f32 	%f730, %f452, %f598, %f729;
	mul.ftz.f32 	%f731, %f600, %f730;
	fma.rn.ftz.f32 	%f683, %f456, %f603, %f731;
	mov.f32 	%f732, 0f3f800000;   	// 1
	mov.f32 	%f733, 0f3f800000;   	// 1
	mov.f32 	%f734, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f735, %f457, %f734;
	mov.f32 	%f736, 0f3f800000;   	// 1
	min.ftz.f32 	%f737, %f735, %f736;
	sub.ftz.f32 	%f738, %f733, %f737;
	mov.f32 	%f739, 0f3f800000;   	// 1
	mov.f32 	%f740, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f741, %f453, %f740;
	mov.f32 	%f742, 0f3f800000;   	// 1
	min.ftz.f32 	%f743, %f741, %f742;
	sub.ftz.f32 	%f744, %f739, %f743;
	mul.ftz.f32 	%f745, %f738, %f744;
	sub.ftz.f32 	%f746, %f732, %f745;
	mov.f32 	%f747, 0f00000000;   	// 0
	max.ftz.f32 	%f748, %f746, %f747;
	mov.f32 	%f749, 0f3f800000;   	// 1
	min.ftz.f32 	%f750, %f748, %f749;
	mul.ftz.f32 	%f751, %f458, %f750;
	fma.rn.ftz.f32 	%f752, %f453, %f598, %f751;
	mul.ftz.f32 	%f753, %f600, %f752;
	fma.rn.ftz.f32 	%f682, %f457, %f603, %f753;
$Lt_125_278530:
	.loc	6	194	0
	mov.f32 	%f451, %f684;
	mov.f32 	%f452, %f683;
	mov.f32 	%f453, %f682;
	mov.f32 	%f454, %f678;
	bra.uni 	$Lt_125_314370;
$Lt_125_1538:
	.loc	22	526	0
	ld.param.f32 	%f754, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f754, %f454;
	mov.f32 	%f755, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f756, %f586, %f755;
	mov.f32 	%f757, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p50, %f756, %f757;
	@!%p50 bra 	$Lt_125_234498;
	.loc	22	528	0
	mov.f32 	%f758, %f455;
	mov.f32 	%f759, %f456;
	mov.f32 	%f760, %f457;
	mov.f32 	%f761, %f458;
	bra.uni 	$LDWendi__Z4Randj_302_56;
$Lt_125_234498:
	.loc	22	530	0
	mov.f32 	%f762, 0f370637bd;   	// 8e-006
	add.ftz.f32 	%f763, %f586, %f762;
	mov.f32 	%f764, 0f3f800000;   	// 1
	setp.ge.ftz.f32 	%p51, %f763, %f764;
	@!%p51 bra 	$Lt_125_234754;
	.loc	22	532	0
	mov.f32 	%f758, %f451;
	mov.f32 	%f759, %f452;
	mov.f32 	%f760, %f453;
	mov.f32 	%f761, %f454;
	bra.uni 	$LDWendi__Z4Randj_302_56;
$Lt_125_234754:
	.loc	21	143	0
	mov.s32 	%r66, 1;
	sub.s32 	%r67, %r66, %r8;
	shr.u32 	%r68, %r10, 13;
	sub.u32 	%r69, %r8, %r10;
	sub.u32 	%r70, %r67, %r10;
	xor.b32 	%r71, %r68, %r70;
	shl.b32 	%r72, %r71, 8;
	sub.u32 	%r73, %r69, %r71;
	sub.u32 	%r74, %r10, %r71;
	xor.b32 	%r75, %r72, %r73;
	shr.u32 	%r76, %r75, 13;
	sub.u32 	%r77, %r74, %r75;
	sub.u32 	%r78, %r71, %r75;
	xor.b32 	%r79, %r76, %r77;
	shr.u32 	%r80, %r79, 12;
	sub.u32 	%r81, %r78, %r79;
	xor.b32 	%r82, %r80, %r81;
	sub.u32 	%r83, %r75, %r79;
	sub.u32 	%r84, %r83, %r82;
	shl.b32 	%r85, %r82, 16;
	xor.b32 	%r86, %r84, %r85;
	.loc	21	144	0
	sub.u32 	%r87, %r79, %r82;
	sub.u32 	%r88, %r87, %r86;
	shr.u32 	%r89, %r86, 5;
	xor.b32 	%r90, %r88, %r89;
	.loc	21	145	0
	sub.u32 	%r91, %r82, %r86;
	sub.u32 	%r92, %r91, %r90;
	shr.u32 	%r93, %r90, 3;
	xor.b32 	%r94, %r92, %r93;
	.loc	21	146	0
	sub.u32 	%r95, %r86, %r90;
	sub.u32 	%r96, %r95, %r94;
	shl.b32 	%r97, %r94, 10;
	xor.b32 	%r98, %r96, %r97;
	.loc	21	147	0
	sub.u32 	%r99, %r90, %r94;
	sub.u32 	%r100, %r99, %r98;
	shr.u32 	%r101, %r98, 15;
	xor.b32 	%r102, %r100, %r101;
	.loc	22	537	0
	mov.f32 	%f765, 0f46fffe00;   	// 32767
	mul.ftz.f32 	%f766, %f586, %f765;
	cvt.rzi.ftz.s32.f32 	%r103, %f766;
	mul.lo.u32 	%r104, %r102, 1103515245;
	add.u32 	%r105, %r104, 12345;
	shr.u32 	%r106, %r105, 16;
	and.b32 	%r107, %r106, 255;
	shl.b32 	%r108, %r107, 7;
	mul.lo.u32 	%r109, %r102, -1029531031;
	sub.u32 	%r110, %r109, 740551042;
	shr.u32 	%r111, %r110, 16;
	and.b32 	%r112, %r111, 255;
	xor.b32 	%r113, %r108, %r112;
	setp.lt.s32 	%p52, %r103, %r113;
	@%p52 bra 	$Lt_125_279298;
	mov.f32 	%f767, %f451;
	mov.f32 	%f768, %f452;
	mov.f32 	%f769, %f453;
	mov.f32 	%f770, %f454;
	bra.uni 	$Lt_125_279042;
$Lt_125_279298:
	mov.f32 	%f767, %f455;
	mov.f32 	%f768, %f456;
	mov.f32 	%f769, %f457;
	mov.f32 	%f770, %f458;
$Lt_125_279042:
	mov.f32 	%f758, %f767;
	mov.f32 	%f759, %f768;
	mov.f32 	%f760, %f769;
	mov.f32 	%f761, %f770;
$LDWendi__Z4Randj_302_56:
	.loc	6	195	0
	mov.f32 	%f451, %f758;
	mov.f32 	%f452, %f759;
	mov.f32 	%f453, %f760;
	mov.f32 	%f454, %f761;
	bra.uni 	$Lt_125_314370;
$Lt_125_1794:
	.loc	22	473	0
	ld.param.f32 	%f771, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f771, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f772, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f772;
	mov.f32 	%f773, %f589;
	mov.f32 	%f774, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f775, %f589, %f774;
	mov.f32 	%f776, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p53, %f775, %f776;
	@!%p53 bra 	$Lt_125_279810;
	mov.f32 	%f777, 0f00000000;   	// 0
	mov.f32 	%f778, 0f00000000;   	// 0
	mov.f32 	%f779, 0f00000000;   	// 0
	mov.f32 	%f773, 0f00000000;   	// 0
	bra.uni 	$Lt_125_279554;
$Lt_125_279810:
	mov.f32 	%f780, 0f3f800000;   	// 1
	sub.ftz.f32 	%f598, %f780, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f781, 0f3f800000;   	// 1
	mul.ftz.f32 	%f782, %f586, %f599;
	sub.ftz.f32 	%f603, %f781, %f782;
	mov.f32 	%f783, 0f3f800000;   	// 1
	mov.f32 	%f784, 0f3f800000;   	// 1
	mov.f32 	%f785, 0f00000000;   	// 0
	max.ftz.f32 	%f786, %f455, %f785;
	mov.f32 	%f787, 0f3f800000;   	// 1
	min.ftz.f32 	%f788, %f786, %f787;
	sub.ftz.f32 	%f789, %f784, %f788;
	mov.f32 	%f790, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f791, %f451, %f790;
	mov.f32 	%f792, 0f3f800000;   	// 1
	min.ftz.f32 	%f793, %f791, %f792;
	div.approx.ftz.f32 	%f794, %f789, %f793;
	sub.ftz.f32 	%f795, %f783, %f794;
	mov.f32 	%f796, 0f00000000;   	// 0
	max.ftz.f32 	%f797, %f795, %f796;
	mov.f32 	%f798, 0f3f800000;   	// 1
	min.ftz.f32 	%f799, %f797, %f798;
	mul.ftz.f32 	%f800, %f458, %f799;
	fma.rn.ftz.f32 	%f801, %f451, %f598, %f800;
	mul.ftz.f32 	%f802, %f600, %f801;
	fma.rn.ftz.f32 	%f779, %f455, %f603, %f802;
	mov.f32 	%f803, 0f3f800000;   	// 1
	mov.f32 	%f804, 0f3f800000;   	// 1
	mov.f32 	%f805, 0f00000000;   	// 0
	max.ftz.f32 	%f806, %f456, %f805;
	mov.f32 	%f807, 0f3f800000;   	// 1
	min.ftz.f32 	%f808, %f806, %f807;
	sub.ftz.f32 	%f809, %f804, %f808;
	mov.f32 	%f810, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f811, %f452, %f810;
	mov.f32 	%f812, 0f3f800000;   	// 1
	min.ftz.f32 	%f813, %f811, %f812;
	div.approx.ftz.f32 	%f814, %f809, %f813;
	sub.ftz.f32 	%f815, %f803, %f814;
	mov.f32 	%f816, 0f00000000;   	// 0
	max.ftz.f32 	%f817, %f815, %f816;
	mov.f32 	%f818, 0f3f800000;   	// 1
	min.ftz.f32 	%f819, %f817, %f818;
	mul.ftz.f32 	%f820, %f458, %f819;
	fma.rn.ftz.f32 	%f821, %f452, %f598, %f820;
	mul.ftz.f32 	%f822, %f600, %f821;
	fma.rn.ftz.f32 	%f778, %f456, %f603, %f822;
	mov.f32 	%f823, 0f3f800000;   	// 1
	mov.f32 	%f824, 0f3f800000;   	// 1
	mov.f32 	%f825, 0f00000000;   	// 0
	max.ftz.f32 	%f826, %f457, %f825;
	mov.f32 	%f827, 0f3f800000;   	// 1
	min.ftz.f32 	%f828, %f826, %f827;
	sub.ftz.f32 	%f829, %f824, %f828;
	mov.f32 	%f830, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f831, %f453, %f830;
	mov.f32 	%f832, 0f3f800000;   	// 1
	min.ftz.f32 	%f833, %f831, %f832;
	div.approx.ftz.f32 	%f834, %f829, %f833;
	sub.ftz.f32 	%f835, %f823, %f834;
	mov.f32 	%f836, 0f00000000;   	// 0
	max.ftz.f32 	%f837, %f835, %f836;
	mov.f32 	%f838, 0f3f800000;   	// 1
	min.ftz.f32 	%f839, %f837, %f838;
	mul.ftz.f32 	%f840, %f458, %f839;
	fma.rn.ftz.f32 	%f841, %f453, %f598, %f840;
	mul.ftz.f32 	%f842, %f600, %f841;
	fma.rn.ftz.f32 	%f777, %f457, %f603, %f842;
$Lt_125_279554:
	.loc	6	196	0
	mov.f32 	%f451, %f779;
	mov.f32 	%f452, %f778;
	mov.f32 	%f453, %f777;
	mov.f32 	%f454, %f773;
	bra.uni 	$Lt_125_314370;
$Lt_125_2050:
	.loc	22	474	0
	ld.param.f32 	%f843, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f843, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f844, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f844;
	mov.f32 	%f845, %f589;
	mov.f32 	%f846, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f847, %f589, %f846;
	mov.f32 	%f848, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p54, %f847, %f848;
	@!%p54 bra 	$Lt_125_280322;
	mov.f32 	%f849, 0f00000000;   	// 0
	mov.f32 	%f850, 0f00000000;   	// 0
	mov.f32 	%f851, 0f00000000;   	// 0
	mov.f32 	%f845, 0f00000000;   	// 0
	bra.uni 	$Lt_125_280066;
$Lt_125_280322:
	mov.f32 	%f852, 0f3f800000;   	// 1
	sub.ftz.f32 	%f598, %f852, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f853, 0f3f800000;   	// 1
	mul.ftz.f32 	%f854, %f586, %f599;
	sub.ftz.f32 	%f603, %f853, %f854;
	mov.f32 	%f855, 0f00000000;   	// 0
	max.ftz.f32 	%f856, %f455, %f855;
	mov.f32 	%f857, 0f3f800000;   	// 1
	min.ftz.f32 	%f858, %f856, %f857;
	mov.f32 	%f859, 0f00000000;   	// 0
	max.ftz.f32 	%f860, %f451, %f859;
	mov.f32 	%f861, 0f3f800000;   	// 1
	min.ftz.f32 	%f862, %f860, %f861;
	add.ftz.f32 	%f863, %f858, %f862;
	mov.f32 	%f864, 0fbf800000;   	// -1
	add.ftz.f32 	%f865, %f863, %f864;
	mov.f32 	%f866, 0f00000000;   	// 0
	max.ftz.f32 	%f867, %f865, %f866;
	mov.f32 	%f868, 0f3f800000;   	// 1
	min.ftz.f32 	%f869, %f867, %f868;
	mul.ftz.f32 	%f870, %f458, %f869;
	fma.rn.ftz.f32 	%f871, %f451, %f598, %f870;
	mul.ftz.f32 	%f872, %f600, %f871;
	fma.rn.ftz.f32 	%f851, %f455, %f603, %f872;
	mov.f32 	%f873, 0f00000000;   	// 0
	max.ftz.f32 	%f874, %f456, %f873;
	mov.f32 	%f875, 0f3f800000;   	// 1
	min.ftz.f32 	%f876, %f874, %f875;
	mov.f32 	%f877, 0f00000000;   	// 0
	max.ftz.f32 	%f878, %f452, %f877;
	mov.f32 	%f879, 0f3f800000;   	// 1
	min.ftz.f32 	%f880, %f878, %f879;
	add.ftz.f32 	%f881, %f876, %f880;
	mov.f32 	%f882, 0fbf800000;   	// -1
	add.ftz.f32 	%f883, %f881, %f882;
	mov.f32 	%f884, 0f00000000;   	// 0
	max.ftz.f32 	%f885, %f883, %f884;
	mov.f32 	%f886, 0f3f800000;   	// 1
	min.ftz.f32 	%f887, %f885, %f886;
	mul.ftz.f32 	%f888, %f458, %f887;
	fma.rn.ftz.f32 	%f889, %f452, %f598, %f888;
	mul.ftz.f32 	%f890, %f600, %f889;
	fma.rn.ftz.f32 	%f850, %f456, %f603, %f890;
	mov.f32 	%f891, 0f00000000;   	// 0
	max.ftz.f32 	%f892, %f457, %f891;
	mov.f32 	%f893, 0f3f800000;   	// 1
	min.ftz.f32 	%f894, %f892, %f893;
	mov.f32 	%f895, 0f00000000;   	// 0
	max.ftz.f32 	%f896, %f453, %f895;
	mov.f32 	%f897, 0f3f800000;   	// 1
	min.ftz.f32 	%f898, %f896, %f897;
	add.ftz.f32 	%f899, %f894, %f898;
	mov.f32 	%f900, 0fbf800000;   	// -1
	add.ftz.f32 	%f901, %f899, %f900;
	mov.f32 	%f902, 0f00000000;   	// 0
	max.ftz.f32 	%f903, %f901, %f902;
	mov.f32 	%f904, 0f3f800000;   	// 1
	min.ftz.f32 	%f905, %f903, %f904;
	mul.ftz.f32 	%f906, %f458, %f905;
	fma.rn.ftz.f32 	%f907, %f453, %f598, %f906;
	mul.ftz.f32 	%f908, %f600, %f907;
	fma.rn.ftz.f32 	%f849, %f457, %f603, %f908;
$Lt_125_280066:
	.loc	6	197	0
	mov.f32 	%f451, %f851;
	mov.f32 	%f452, %f850;
	mov.f32 	%f453, %f849;
	mov.f32 	%f454, %f845;
	bra.uni 	$Lt_125_314370;
$Lt_125_2306:
	.loc	6	198	0
	ld.param.f32 	%f909, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f909, %f454;
	mov.f32 	%f910, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f911, %f586, %f910;
	mov.f32 	%f912, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p55, %f911, %f912;
	@!%p55 bra 	$Lt_125_280834;
	.loc	22	608	0
	mov.f32 	%f913, %f455;
	mov.f32 	%f914, %f456;
	mov.f32 	%f915, %f457;
	mov.f32 	%f916, %f458;
	bra.uni 	$Lt_125_281602;
$Lt_125_280834:
	mov.f32 	%f917, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f918, %f458, %f917;
	mov.f32 	%f919, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p56, %f918, %f919;
	@!%p56 bra 	$Lt_125_281346;
	mov.f32 	%f913, %f451;
	mov.f32 	%f914, %f452;
	mov.f32 	%f915, %f453;
	mov.f32 	%f916, %f586;
	bra.uni 	$Lt_125_281602;
$Lt_125_281346:
	mov.u32 	%r114, 720;
	setp.gt.s32 	%p57, %r11, %r114;
	@%p57 bra 	$Lt_125_281858;
	.loc	22	555	0
	ld.const.f32 	%f920, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f921, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f922, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f923, %f456, %f922;
	fma.rn.ftz.f32 	%f924, %f921, %f457, %f923;
	fma.rn.ftz.f32 	%f925, %f920, %f455, %f924;
	cvt.ftz.sat.f32.f32 	%f926, %f925;
	mul.ftz.f32 	%f927, %f922, %f452;
	fma.rn.ftz.f32 	%f928, %f921, %f453, %f927;
	fma.rn.ftz.f32 	%f929, %f920, %f451, %f928;
	cvt.ftz.sat.f32.f32 	%f930, %f929;
	setp.gt.ftz.f32 	%p58, %f926, %f930;
	@!%p58 bra 	$Lt_125_236546;
	.loc	22	468	0
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f931, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f931;
	mov.f32 	%f932, %f589;
	mov.f32 	%f933, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f934, %f589, %f933;
	mov.f32 	%f935, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p59, %f934, %f935;
	@!%p59 bra 	$Lt_125_282370;
	mov.f32 	%f936, 0f00000000;   	// 0
	mov.f32 	%f937, 0f00000000;   	// 0
	mov.f32 	%f938, 0f00000000;   	// 0
	mov.f32 	%f932, 0f00000000;   	// 0
	bra.uni 	$Lt_125_282114;
$Lt_125_282370:
	mov.f32 	%f939, 0f3f800000;   	// 1
	sub.ftz.f32 	%f598, %f939, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f940, 0f3f800000;   	// 1
	mul.ftz.f32 	%f941, %f586, %f599;
	sub.ftz.f32 	%f603, %f940, %f941;
	mul.ftz.f32 	%f942, %f598, %f451;
	fma.rn.ftz.f32 	%f943, %f451, %f458, %f942;
	mul.ftz.f32 	%f944, %f600, %f943;
	fma.rn.ftz.f32 	%f938, %f455, %f603, %f944;
	mul.ftz.f32 	%f945, %f598, %f452;
	fma.rn.ftz.f32 	%f946, %f452, %f458, %f945;
	mul.ftz.f32 	%f947, %f600, %f946;
	fma.rn.ftz.f32 	%f937, %f456, %f603, %f947;
	mul.ftz.f32 	%f948, %f598, %f453;
	fma.rn.ftz.f32 	%f949, %f453, %f458, %f948;
	mul.ftz.f32 	%f950, %f600, %f949;
	fma.rn.ftz.f32 	%f936, %f457, %f603, %f950;
$Lt_125_282114:
	.loc	22	557	0
	mov.f32 	%f951, %f938;
	mov.f32 	%f952, %f937;
	mov.f32 	%f953, %f936;
	mov.f32 	%f954, %f932;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_302_54;
$Lt_125_236546:
	.loc	22	561	0
	mov.f32 	%f951, %f455;
	mov.f32 	%f952, %f456;
	mov.f32 	%f953, %f457;
	mov.f32 	%f954, %f458;
$LDWendi__Z10GetLuma6018PixelRGB_302_54:
	.loc	22	608	0
	mov.f32 	%f913, %f951;
	mov.f32 	%f914, %f952;
	mov.f32 	%f915, %f953;
	mov.f32 	%f916, %f954;
	bra.uni 	$Lt_125_281602;
$Lt_125_281858:
	.loc	22	569	0
	ld.const.f32 	%f955, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f956, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f957, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f958, %f456, %f957;
	fma.rn.ftz.f32 	%f959, %f956, %f457, %f958;
	fma.rn.ftz.f32 	%f960, %f955, %f455, %f959;
	cvt.ftz.sat.f32.f32 	%f961, %f960;
	mul.ftz.f32 	%f962, %f957, %f452;
	fma.rn.ftz.f32 	%f963, %f956, %f453, %f962;
	fma.rn.ftz.f32 	%f964, %f955, %f451, %f963;
	cvt.ftz.sat.f32.f32 	%f965, %f964;
	setp.gt.ftz.f32 	%p60, %f961, %f965;
	@!%p60 bra 	$Lt_125_237058;
	.loc	22	468	0
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f966, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f966;
	mov.f32 	%f967, %f589;
	mov.f32 	%f968, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f969, %f589, %f968;
	mov.f32 	%f970, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p61, %f969, %f970;
	@!%p61 bra 	$Lt_125_282882;
	mov.f32 	%f971, 0f00000000;   	// 0
	mov.f32 	%f972, 0f00000000;   	// 0
	mov.f32 	%f973, 0f00000000;   	// 0
	mov.f32 	%f967, 0f00000000;   	// 0
	bra.uni 	$Lt_125_282626;
$Lt_125_282882:
	mov.f32 	%f974, 0f3f800000;   	// 1
	sub.ftz.f32 	%f598, %f974, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f975, 0f3f800000;   	// 1
	mul.ftz.f32 	%f976, %f586, %f599;
	sub.ftz.f32 	%f603, %f975, %f976;
	mul.ftz.f32 	%f977, %f598, %f451;
	fma.rn.ftz.f32 	%f978, %f451, %f458, %f977;
	mul.ftz.f32 	%f979, %f600, %f978;
	fma.rn.ftz.f32 	%f973, %f455, %f603, %f979;
	mul.ftz.f32 	%f980, %f598, %f452;
	fma.rn.ftz.f32 	%f981, %f452, %f458, %f980;
	mul.ftz.f32 	%f982, %f600, %f981;
	fma.rn.ftz.f32 	%f972, %f456, %f603, %f982;
	mul.ftz.f32 	%f983, %f598, %f453;
	fma.rn.ftz.f32 	%f984, %f453, %f458, %f983;
	mul.ftz.f32 	%f985, %f600, %f984;
	fma.rn.ftz.f32 	%f971, %f457, %f603, %f985;
$Lt_125_282626:
	.loc	22	571	0
	mov.f32 	%f986, %f973;
	mov.f32 	%f987, %f972;
	mov.f32 	%f988, %f971;
	mov.f32 	%f989, %f967;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_302_52;
$Lt_125_237058:
	.loc	22	575	0
	mov.f32 	%f986, %f455;
	mov.f32 	%f987, %f456;
	mov.f32 	%f988, %f457;
	mov.f32 	%f989, %f458;
$LDWendi__Z10GetLuma7098PixelRGB_302_52:
	.loc	22	608	0
	mov.f32 	%f913, %f986;
	mov.f32 	%f914, %f987;
	mov.f32 	%f915, %f988;
	mov.f32 	%f916, %f989;
$Lt_125_281602:
$Lt_125_281090:
$Lt_125_280578:
	.loc	6	198	0
	mov.f32 	%f451, %f913;
	mov.f32 	%f452, %f914;
	mov.f32 	%f453, %f915;
	mov.f32 	%f454, %f916;
	bra.uni 	$Lt_125_314370;
$Lt_125_2562:
	.loc	22	475	0
	ld.param.f32 	%f990, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f990, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f991, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f991;
	mov.f32 	%f992, %f589;
	mov.f32 	%f993, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f994, %f589, %f993;
	mov.f32 	%f995, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p62, %f994, %f995;
	@!%p62 bra 	$Lt_125_283394;
	mov.f32 	%f996, 0f00000000;   	// 0
	mov.f32 	%f997, 0f00000000;   	// 0
	mov.f32 	%f998, 0f00000000;   	// 0
	mov.f32 	%f992, 0f00000000;   	// 0
	bra.uni 	$Lt_125_283138;
$Lt_125_283394:
	mov.f32 	%f999, 0f3f800000;   	// 1
	sub.ftz.f32 	%f598, %f999, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1000, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1001, %f586, %f599;
	sub.ftz.f32 	%f603, %f1000, %f1001;
	mov.f32 	%f1002, 0f00000000;  	// 0
	max.ftz.f32 	%f1003, %f455, %f1002;
	mov.f32 	%f1004, 0f3f800000;  	// 1
	min.ftz.f32 	%f1005, %f1003, %f1004;
	mov.f32 	%f1006, 0f3f800000;  	// 1
	mov.f32 	%f1007, 0f00000000;  	// 0
	max.ftz.f32 	%f1008, %f451, %f1007;
	mov.f32 	%f1009, 0f3f7fff58;  	// 0.99999
	min.ftz.f32 	%f1010, %f1008, %f1009;
	sub.ftz.f32 	%f1011, %f1006, %f1010;
	div.approx.ftz.f32 	%f1012, %f1005, %f1011;
	mov.f32 	%f1013, 0f00000000;  	// 0
	max.ftz.f32 	%f1014, %f1012, %f1013;
	mov.f32 	%f1015, 0f3f800000;  	// 1
	min.ftz.f32 	%f1016, %f1014, %f1015;
	mul.ftz.f32 	%f1017, %f458, %f1016;
	fma.rn.ftz.f32 	%f1018, %f451, %f598, %f1017;
	mul.ftz.f32 	%f1019, %f600, %f1018;
	fma.rn.ftz.f32 	%f998, %f455, %f603, %f1019;
	mov.f32 	%f1020, 0f00000000;  	// 0
	max.ftz.f32 	%f1021, %f456, %f1020;
	mov.f32 	%f1022, 0f3f800000;  	// 1
	min.ftz.f32 	%f1023, %f1021, %f1022;
	mov.f32 	%f1024, 0f3f800000;  	// 1
	mov.f32 	%f1025, 0f00000000;  	// 0
	max.ftz.f32 	%f1026, %f452, %f1025;
	mov.f32 	%f1027, 0f3f7fff58;  	// 0.99999
	min.ftz.f32 	%f1028, %f1026, %f1027;
	sub.ftz.f32 	%f1029, %f1024, %f1028;
	div.approx.ftz.f32 	%f1030, %f1023, %f1029;
	mov.f32 	%f1031, 0f00000000;  	// 0
	max.ftz.f32 	%f1032, %f1030, %f1031;
	mov.f32 	%f1033, 0f3f800000;  	// 1
	min.ftz.f32 	%f1034, %f1032, %f1033;
	mul.ftz.f32 	%f1035, %f458, %f1034;
	fma.rn.ftz.f32 	%f1036, %f452, %f598, %f1035;
	mul.ftz.f32 	%f1037, %f600, %f1036;
	fma.rn.ftz.f32 	%f997, %f456, %f603, %f1037;
	mov.f32 	%f1038, 0f00000000;  	// 0
	max.ftz.f32 	%f1039, %f457, %f1038;
	mov.f32 	%f1040, 0f3f800000;  	// 1
	min.ftz.f32 	%f1041, %f1039, %f1040;
	mov.f32 	%f1042, 0f3f800000;  	// 1
	mov.f32 	%f1043, 0f00000000;  	// 0
	max.ftz.f32 	%f1044, %f453, %f1043;
	mov.f32 	%f1045, 0f3f7fff58;  	// 0.99999
	min.ftz.f32 	%f1046, %f1044, %f1045;
	sub.ftz.f32 	%f1047, %f1042, %f1046;
	div.approx.ftz.f32 	%f1048, %f1041, %f1047;
	mov.f32 	%f1049, 0f00000000;  	// 0
	max.ftz.f32 	%f1050, %f1048, %f1049;
	mov.f32 	%f1051, 0f3f800000;  	// 1
	min.ftz.f32 	%f1052, %f1050, %f1051;
	mul.ftz.f32 	%f1053, %f458, %f1052;
	fma.rn.ftz.f32 	%f1054, %f453, %f598, %f1053;
	mul.ftz.f32 	%f1055, %f600, %f1054;
	fma.rn.ftz.f32 	%f996, %f457, %f603, %f1055;
$Lt_125_283138:
	.loc	6	199	0
	mov.f32 	%f451, %f998;
	mov.f32 	%f452, %f997;
	mov.f32 	%f453, %f996;
	mov.f32 	%f454, %f992;
	bra.uni 	$Lt_125_314370;
$Lt_125_2818:
	.loc	22	476	0
	ld.param.f32 	%f1056, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1056, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1057, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1057;
	mov.f32 	%f1058, %f589;
	mov.f32 	%f1059, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1060, %f589, %f1059;
	mov.f32 	%f1061, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p63, %f1060, %f1061;
	@!%p63 bra 	$Lt_125_283906;
	mov.f32 	%f1062, 0f00000000;  	// 0
	mov.f32 	%f1063, 0f00000000;  	// 0
	mov.f32 	%f1064, 0f00000000;  	// 0
	mov.f32 	%f1058, 0f00000000;  	// 0
	bra.uni 	$Lt_125_283650;
$Lt_125_283906:
	mov.f32 	%f1065, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1065, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1066, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1067, %f586, %f599;
	sub.ftz.f32 	%f603, %f1066, %f1067;
	add.ftz.f32 	%f1068, %f455, %f451;
	mov.f32 	%f1069, 0f00000000;  	// 0
	max.ftz.f32 	%f1070, %f1068, %f1069;
	mov.f32 	%f1071, 0f3f800000;  	// 1
	min.ftz.f32 	%f1072, %f1070, %f1071;
	mul.ftz.f32 	%f1073, %f458, %f1072;
	fma.rn.ftz.f32 	%f1074, %f451, %f598, %f1073;
	mul.ftz.f32 	%f1075, %f600, %f1074;
	fma.rn.ftz.f32 	%f1064, %f455, %f603, %f1075;
	add.ftz.f32 	%f1076, %f456, %f452;
	mov.f32 	%f1077, 0f00000000;  	// 0
	max.ftz.f32 	%f1078, %f1076, %f1077;
	mov.f32 	%f1079, 0f3f800000;  	// 1
	min.ftz.f32 	%f1080, %f1078, %f1079;
	mul.ftz.f32 	%f1081, %f458, %f1080;
	fma.rn.ftz.f32 	%f1082, %f452, %f598, %f1081;
	mul.ftz.f32 	%f1083, %f600, %f1082;
	fma.rn.ftz.f32 	%f1063, %f456, %f603, %f1083;
	add.ftz.f32 	%f1084, %f457, %f453;
	mov.f32 	%f1085, 0f00000000;  	// 0
	max.ftz.f32 	%f1086, %f1084, %f1085;
	mov.f32 	%f1087, 0f3f800000;  	// 1
	min.ftz.f32 	%f1088, %f1086, %f1087;
	mul.ftz.f32 	%f1089, %f458, %f1088;
	fma.rn.ftz.f32 	%f1090, %f453, %f598, %f1089;
	mul.ftz.f32 	%f1091, %f600, %f1090;
	fma.rn.ftz.f32 	%f1062, %f457, %f603, %f1091;
$Lt_125_283650:
	.loc	6	200	0
	mov.f32 	%f451, %f1064;
	mov.f32 	%f452, %f1063;
	mov.f32 	%f453, %f1062;
	mov.f32 	%f454, %f1058;
	bra.uni 	$Lt_125_314370;
$Lt_125_3074:
	.loc	6	201	0
	ld.param.f32 	%f1092, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1092, %f454;
	mov.f32 	%f1093, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1094, %f586, %f1093;
	mov.f32 	%f1095, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p64, %f1094, %f1095;
	@!%p64 bra 	$Lt_125_284418;
	.loc	22	609	0
	mov.f32 	%f1096, %f455;
	mov.f32 	%f1097, %f456;
	mov.f32 	%f1098, %f457;
	mov.f32 	%f1099, %f458;
	bra.uni 	$Lt_125_285186;
$Lt_125_284418:
	mov.f32 	%f1100, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1101, %f458, %f1100;
	mov.f32 	%f1102, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p65, %f1101, %f1102;
	@!%p65 bra 	$Lt_125_284930;
	mov.f32 	%f1096, %f451;
	mov.f32 	%f1097, %f452;
	mov.f32 	%f1098, %f453;
	mov.f32 	%f1099, %f586;
	bra.uni 	$Lt_125_285186;
$Lt_125_284930:
	mov.u32 	%r115, 720;
	setp.gt.s32 	%p66, %r11, %r115;
	@%p66 bra 	$Lt_125_285442;
	.loc	22	584	0
	ld.const.f32 	%f920, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f921, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f922, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1103, %f456, %f922;
	fma.rn.ftz.f32 	%f1104, %f921, %f457, %f1103;
	fma.rn.ftz.f32 	%f1105, %f920, %f455, %f1104;
	cvt.ftz.sat.f32.f32 	%f1106, %f1105;
	mul.ftz.f32 	%f1107, %f922, %f452;
	fma.rn.ftz.f32 	%f1108, %f921, %f453, %f1107;
	fma.rn.ftz.f32 	%f1109, %f920, %f451, %f1108;
	cvt.ftz.sat.f32.f32 	%f1110, %f1109;
	setp.lt.ftz.f32 	%p67, %f1106, %f1110;
	@!%p67 bra 	$Lt_125_238850;
	.loc	22	468	0
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1111, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1111;
	mov.f32 	%f1112, %f589;
	mov.f32 	%f1113, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1114, %f589, %f1113;
	mov.f32 	%f1115, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p68, %f1114, %f1115;
	@!%p68 bra 	$Lt_125_285954;
	mov.f32 	%f1116, 0f00000000;  	// 0
	mov.f32 	%f1117, 0f00000000;  	// 0
	mov.f32 	%f1118, 0f00000000;  	// 0
	mov.f32 	%f1112, 0f00000000;  	// 0
	bra.uni 	$Lt_125_285698;
$Lt_125_285954:
	mov.f32 	%f1119, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1119, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1120, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1121, %f586, %f599;
	sub.ftz.f32 	%f603, %f1120, %f1121;
	mul.ftz.f32 	%f1122, %f598, %f451;
	fma.rn.ftz.f32 	%f1123, %f451, %f458, %f1122;
	mul.ftz.f32 	%f1124, %f600, %f1123;
	fma.rn.ftz.f32 	%f1118, %f455, %f603, %f1124;
	mul.ftz.f32 	%f1125, %f598, %f452;
	fma.rn.ftz.f32 	%f1126, %f452, %f458, %f1125;
	mul.ftz.f32 	%f1127, %f600, %f1126;
	fma.rn.ftz.f32 	%f1117, %f456, %f603, %f1127;
	mul.ftz.f32 	%f1128, %f598, %f453;
	fma.rn.ftz.f32 	%f1129, %f453, %f458, %f1128;
	mul.ftz.f32 	%f1130, %f600, %f1129;
	fma.rn.ftz.f32 	%f1116, %f457, %f603, %f1130;
$Lt_125_285698:
	.loc	22	586	0
	mov.f32 	%f1131, %f1118;
	mov.f32 	%f1132, %f1117;
	mov.f32 	%f1133, %f1116;
	mov.f32 	%f1134, %f1112;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_302_50;
$Lt_125_238850:
	.loc	22	590	0
	mov.f32 	%f1131, %f455;
	mov.f32 	%f1132, %f456;
	mov.f32 	%f1133, %f457;
	mov.f32 	%f1134, %f458;
$LDWendi__Z10GetLuma6018PixelRGB_302_50:
	.loc	22	609	0
	mov.f32 	%f1096, %f1131;
	mov.f32 	%f1097, %f1132;
	mov.f32 	%f1098, %f1133;
	mov.f32 	%f1099, %f1134;
	bra.uni 	$Lt_125_285186;
$Lt_125_285442:
	.loc	22	598	0
	ld.const.f32 	%f1135, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f1136, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f1137, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f1138, %f456, %f1137;
	fma.rn.ftz.f32 	%f1139, %f1136, %f457, %f1138;
	fma.rn.ftz.f32 	%f1140, %f1135, %f455, %f1139;
	cvt.ftz.sat.f32.f32 	%f1141, %f1140;
	mul.ftz.f32 	%f1142, %f1137, %f452;
	fma.rn.ftz.f32 	%f1143, %f1136, %f453, %f1142;
	fma.rn.ftz.f32 	%f1144, %f1135, %f451, %f1143;
	cvt.ftz.sat.f32.f32 	%f1145, %f1144;
	setp.lt.ftz.f32 	%p69, %f1141, %f1145;
	@!%p69 bra 	$Lt_125_239362;
	.loc	22	468	0
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1146, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1146;
	mov.f32 	%f1147, %f589;
	mov.f32 	%f1148, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1149, %f589, %f1148;
	mov.f32 	%f1150, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p70, %f1149, %f1150;
	@!%p70 bra 	$Lt_125_286466;
	mov.f32 	%f1151, 0f00000000;  	// 0
	mov.f32 	%f1152, 0f00000000;  	// 0
	mov.f32 	%f1153, 0f00000000;  	// 0
	mov.f32 	%f1147, 0f00000000;  	// 0
	bra.uni 	$Lt_125_286210;
$Lt_125_286466:
	mov.f32 	%f1154, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1154, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1155, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1156, %f586, %f599;
	sub.ftz.f32 	%f603, %f1155, %f1156;
	mul.ftz.f32 	%f1157, %f598, %f451;
	fma.rn.ftz.f32 	%f1158, %f451, %f458, %f1157;
	mul.ftz.f32 	%f1159, %f600, %f1158;
	fma.rn.ftz.f32 	%f1153, %f455, %f603, %f1159;
	mul.ftz.f32 	%f1160, %f598, %f452;
	fma.rn.ftz.f32 	%f1161, %f452, %f458, %f1160;
	mul.ftz.f32 	%f1162, %f600, %f1161;
	fma.rn.ftz.f32 	%f1152, %f456, %f603, %f1162;
	mul.ftz.f32 	%f1163, %f598, %f453;
	fma.rn.ftz.f32 	%f1164, %f453, %f458, %f1163;
	mul.ftz.f32 	%f1165, %f600, %f1164;
	fma.rn.ftz.f32 	%f1151, %f457, %f603, %f1165;
$Lt_125_286210:
	.loc	22	600	0
	mov.f32 	%f1166, %f1153;
	mov.f32 	%f1167, %f1152;
	mov.f32 	%f1168, %f1151;
	mov.f32 	%f1169, %f1147;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_302_48;
$Lt_125_239362:
	.loc	22	604	0
	mov.f32 	%f1166, %f455;
	mov.f32 	%f1167, %f456;
	mov.f32 	%f1168, %f457;
	mov.f32 	%f1169, %f458;
$LDWendi__Z10GetLuma7098PixelRGB_302_48:
	.loc	22	609	0
	mov.f32 	%f1096, %f1166;
	mov.f32 	%f1097, %f1167;
	mov.f32 	%f1098, %f1168;
	mov.f32 	%f1099, %f1169;
$Lt_125_285186:
$Lt_125_284674:
$Lt_125_284162:
	.loc	6	201	0
	mov.f32 	%f451, %f1096;
	mov.f32 	%f452, %f1097;
	mov.f32 	%f453, %f1098;
	mov.f32 	%f454, %f1099;
	bra.uni 	$Lt_125_314370;
$Lt_125_3330:
	.loc	22	477	0
	ld.param.f32 	%f1170, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1170, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1171, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1171;
	mov.f32 	%f1172, %f589;
	mov.f32 	%f1173, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1174, %f589, %f1173;
	mov.f32 	%f1175, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p71, %f1174, %f1175;
	@!%p71 bra 	$Lt_125_286978;
	mov.f32 	%f1176, 0f00000000;  	// 0
	mov.f32 	%f1177, 0f00000000;  	// 0
	mov.f32 	%f1178, 0f00000000;  	// 0
	mov.f32 	%f1172, 0f00000000;  	// 0
	bra.uni 	$Lt_125_286722;
$Lt_125_286978:
	.loc	22	373	0
	mov.f32 	%f1179, 0f00000000;  	// 0
	max.ftz.f32 	%f1180, %f455, %f1179;
	mov.f32 	%f1181, 0f00000000;  	// 0
	max.ftz.f32 	%f1182, %f451, %f1181;
	mov.f32 	%f1183, 0f3f800000;  	// 1
	min.ftz.f32 	%f1184, %f1180, %f1183;
	mov.f32 	%f1185, 0f3f800000;  	// 1
	min.ftz.f32 	%f1186, %f1182, %f1185;
	mov.f32 	%f1187, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p72, %f1184, %f1187;
	@!%p72 bra 	$Lt_125_287490;
	add.ftz.f32 	%f1188, %f1186, %f1186;
	mul.ftz.f32 	%f1189, %f1184, %f1188;
	bra.uni 	$Lt_125_287234;
$Lt_125_287490:
	mov.f32 	%f1190, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1191, %f1190, %f1186;
	mov.f32 	%f1192, 0f3f800000;  	// 1
	add.ftz.f32 	%f1193, %f1191, %f1191;
	mov.f32 	%f1194, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1195, %f1194, %f1184;
	mul.ftz.f32 	%f1196, %f1193, %f1195;
	sub.ftz.f32 	%f1189, %f1192, %f1196;
$Lt_125_287234:
	.loc	22	477	0
	mov.f32 	%f1197, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1197, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1198, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1199, %f586, %f599;
	sub.ftz.f32 	%f603, %f1198, %f1199;
	mov.f32 	%f1200, 0f00000000;  	// 0
	max.ftz.f32 	%f1201, %f1189, %f1200;
	mov.f32 	%f1202, 0f3f800000;  	// 1
	min.ftz.f32 	%f1203, %f1201, %f1202;
	mul.ftz.f32 	%f1204, %f458, %f1203;
	fma.rn.ftz.f32 	%f1205, %f451, %f598, %f1204;
	mul.ftz.f32 	%f1206, %f600, %f1205;
	fma.rn.ftz.f32 	%f1178, %f455, %f603, %f1206;
	.loc	22	373	0
	mov.f32 	%f1207, 0f00000000;  	// 0
	max.ftz.f32 	%f1208, %f456, %f1207;
	mov.f32 	%f1209, 0f00000000;  	// 0
	max.ftz.f32 	%f1210, %f452, %f1209;
	mov.f32 	%f1211, 0f3f800000;  	// 1
	min.ftz.f32 	%f1212, %f1208, %f1211;
	mov.f32 	%f1213, 0f3f800000;  	// 1
	min.ftz.f32 	%f1214, %f1210, %f1213;
	mov.f32 	%f1215, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p73, %f1212, %f1215;
	@!%p73 bra 	$Lt_125_288002;
	add.ftz.f32 	%f1216, %f1214, %f1214;
	mul.ftz.f32 	%f1217, %f1212, %f1216;
	bra.uni 	$Lt_125_287746;
$Lt_125_288002:
	mov.f32 	%f1218, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1219, %f1218, %f1214;
	mov.f32 	%f1220, 0f3f800000;  	// 1
	add.ftz.f32 	%f1221, %f1219, %f1219;
	mov.f32 	%f1222, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1223, %f1222, %f1212;
	mul.ftz.f32 	%f1224, %f1221, %f1223;
	sub.ftz.f32 	%f1217, %f1220, %f1224;
$Lt_125_287746:
	.loc	22	477	0
	mov.f32 	%f1225, 0f00000000;  	// 0
	max.ftz.f32 	%f1226, %f1217, %f1225;
	mov.f32 	%f1227, 0f3f800000;  	// 1
	min.ftz.f32 	%f1228, %f1226, %f1227;
	mul.ftz.f32 	%f1229, %f458, %f1228;
	fma.rn.ftz.f32 	%f1230, %f452, %f598, %f1229;
	mul.ftz.f32 	%f1231, %f600, %f1230;
	fma.rn.ftz.f32 	%f1177, %f456, %f603, %f1231;
	.loc	22	373	0
	mov.f32 	%f1232, 0f00000000;  	// 0
	max.ftz.f32 	%f1233, %f457, %f1232;
	mov.f32 	%f1234, 0f00000000;  	// 0
	max.ftz.f32 	%f1235, %f453, %f1234;
	mov.f32 	%f1236, 0f3f800000;  	// 1
	min.ftz.f32 	%f1237, %f1233, %f1236;
	mov.f32 	%f1238, 0f3f800000;  	// 1
	min.ftz.f32 	%f1239, %f1235, %f1238;
	mov.f32 	%f1240, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p74, %f1237, %f1240;
	@!%p74 bra 	$Lt_125_288514;
	add.ftz.f32 	%f1241, %f1239, %f1239;
	mul.ftz.f32 	%f1242, %f1237, %f1241;
	bra.uni 	$Lt_125_288258;
$Lt_125_288514:
	mov.f32 	%f1243, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1244, %f1243, %f1239;
	mov.f32 	%f1245, 0f3f800000;  	// 1
	add.ftz.f32 	%f1246, %f1244, %f1244;
	mov.f32 	%f1247, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1248, %f1247, %f1237;
	mul.ftz.f32 	%f1249, %f1246, %f1248;
	sub.ftz.f32 	%f1242, %f1245, %f1249;
$Lt_125_288258:
	.loc	22	477	0
	mov.f32 	%f1250, 0f00000000;  	// 0
	max.ftz.f32 	%f1251, %f1242, %f1250;
	mov.f32 	%f1252, 0f3f800000;  	// 1
	min.ftz.f32 	%f1253, %f1251, %f1252;
	mul.ftz.f32 	%f1254, %f458, %f1253;
	fma.rn.ftz.f32 	%f1255, %f453, %f598, %f1254;
	mul.ftz.f32 	%f1256, %f600, %f1255;
	fma.rn.ftz.f32 	%f1176, %f457, %f603, %f1256;
$Lt_125_286722:
	.loc	6	202	0
	mov.f32 	%f451, %f1178;
	mov.f32 	%f452, %f1177;
	mov.f32 	%f453, %f1176;
	mov.f32 	%f454, %f1172;
	bra.uni 	$Lt_125_314370;
$Lt_125_3586:
	.loc	22	478	0
	ld.param.f32 	%f1257, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1257, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1258, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1258;
	mov.f32 	%f1259, %f589;
	mov.f32 	%f1260, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1261, %f589, %f1260;
	mov.f32 	%f1262, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p75, %f1261, %f1262;
	@!%p75 bra 	$Lt_125_289026;
	mov.f32 	%f1263, 0f00000000;  	// 0
	mov.f32 	%f1264, 0f00000000;  	// 0
	mov.f32 	%f1265, 0f00000000;  	// 0
	mov.f32 	%f1259, 0f00000000;  	// 0
	bra.uni 	$Lt_125_288770;
$Lt_125_289026:
	.loc	22	380	0
	mov.f32 	%f1266, 0f00000000;  	// 0
	max.ftz.f32 	%f1180, %f455, %f1266;
	mov.f32 	%f1267, 0f00000000;  	// 0
	max.ftz.f32 	%f1182, %f451, %f1267;
	mov.f32 	%f1268, 0f3f800000;  	// 1
	min.ftz.f32 	%f1184, %f1180, %f1268;
	mov.f32 	%f1269, 0f3f800000;  	// 1
	min.ftz.f32 	%f1186, %f1182, %f1269;
	add.ftz.f32 	%f1270, %f1186, %f1186;
	mov.f32 	%f1271, 0fbf800000;  	// -1
	add.ftz.f32 	%f1272, %f1270, %f1271;
	mov.f32 	%f1273, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p76, %f1186, %f1273;
	@!%p76 bra 	$Lt_125_289538;
	mul.ftz.f32 	%f1274, %f1184, %f1184;
	sub.ftz.f32 	%f1275, %f1184, %f1274;
	fma.rn.ftz.f32 	%f1276, %f1272, %f1275, %f1184;
	bra.uni 	$Lt_125_289282;
$Lt_125_289538:
	sqrt.approx.ftz.f32 	%f1277, %f1184;
	sub.ftz.f32 	%f1278, %f1277, %f1184;
	fma.rn.ftz.f32 	%f1276, %f1272, %f1278, %f1184;
$Lt_125_289282:
	.loc	22	478	0
	mov.f32 	%f1279, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1279, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1280, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1281, %f586, %f599;
	sub.ftz.f32 	%f603, %f1280, %f1281;
	mov.f32 	%f1282, 0f00000000;  	// 0
	max.ftz.f32 	%f1283, %f1276, %f1282;
	mov.f32 	%f1284, 0f3f800000;  	// 1
	min.ftz.f32 	%f1285, %f1283, %f1284;
	mul.ftz.f32 	%f1286, %f458, %f1285;
	fma.rn.ftz.f32 	%f1287, %f451, %f598, %f1286;
	mul.ftz.f32 	%f1288, %f600, %f1287;
	fma.rn.ftz.f32 	%f1265, %f455, %f603, %f1288;
	.loc	22	380	0
	mov.f32 	%f1289, 0f00000000;  	// 0
	max.ftz.f32 	%f1208, %f456, %f1289;
	mov.f32 	%f1290, 0f00000000;  	// 0
	max.ftz.f32 	%f1210, %f452, %f1290;
	mov.f32 	%f1291, 0f3f800000;  	// 1
	min.ftz.f32 	%f1212, %f1208, %f1291;
	mov.f32 	%f1292, 0f3f800000;  	// 1
	min.ftz.f32 	%f1214, %f1210, %f1292;
	add.ftz.f32 	%f1293, %f1214, %f1214;
	mov.f32 	%f1294, 0fbf800000;  	// -1
	add.ftz.f32 	%f1295, %f1293, %f1294;
	mov.f32 	%f1296, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p77, %f1214, %f1296;
	@!%p77 bra 	$Lt_125_290050;
	mul.ftz.f32 	%f1297, %f1212, %f1212;
	sub.ftz.f32 	%f1298, %f1212, %f1297;
	fma.rn.ftz.f32 	%f1299, %f1295, %f1298, %f1212;
	bra.uni 	$Lt_125_289794;
$Lt_125_290050:
	sqrt.approx.ftz.f32 	%f1300, %f1212;
	sub.ftz.f32 	%f1301, %f1300, %f1212;
	fma.rn.ftz.f32 	%f1299, %f1295, %f1301, %f1212;
$Lt_125_289794:
	.loc	22	478	0
	mov.f32 	%f1302, 0f00000000;  	// 0
	max.ftz.f32 	%f1303, %f1299, %f1302;
	mov.f32 	%f1304, 0f3f800000;  	// 1
	min.ftz.f32 	%f1305, %f1303, %f1304;
	mul.ftz.f32 	%f1306, %f458, %f1305;
	fma.rn.ftz.f32 	%f1307, %f452, %f598, %f1306;
	mul.ftz.f32 	%f1308, %f600, %f1307;
	fma.rn.ftz.f32 	%f1264, %f456, %f603, %f1308;
	.loc	22	380	0
	mov.f32 	%f1309, 0f00000000;  	// 0
	max.ftz.f32 	%f1233, %f457, %f1309;
	mov.f32 	%f1310, 0f00000000;  	// 0
	max.ftz.f32 	%f1235, %f453, %f1310;
	mov.f32 	%f1311, 0f3f800000;  	// 1
	min.ftz.f32 	%f1237, %f1233, %f1311;
	mov.f32 	%f1312, 0f3f800000;  	// 1
	min.ftz.f32 	%f1239, %f1235, %f1312;
	add.ftz.f32 	%f1313, %f1239, %f1239;
	mov.f32 	%f1314, 0fbf800000;  	// -1
	add.ftz.f32 	%f1315, %f1313, %f1314;
	mov.f32 	%f1316, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p78, %f1239, %f1316;
	@!%p78 bra 	$Lt_125_290562;
	mul.ftz.f32 	%f1317, %f1237, %f1237;
	sub.ftz.f32 	%f1318, %f1237, %f1317;
	fma.rn.ftz.f32 	%f1319, %f1315, %f1318, %f1237;
	bra.uni 	$Lt_125_290306;
$Lt_125_290562:
	sqrt.approx.ftz.f32 	%f1320, %f1237;
	sub.ftz.f32 	%f1321, %f1320, %f1237;
	fma.rn.ftz.f32 	%f1319, %f1315, %f1321, %f1237;
$Lt_125_290306:
	.loc	22	478	0
	mov.f32 	%f1322, 0f00000000;  	// 0
	max.ftz.f32 	%f1323, %f1319, %f1322;
	mov.f32 	%f1324, 0f3f800000;  	// 1
	min.ftz.f32 	%f1325, %f1323, %f1324;
	mul.ftz.f32 	%f1326, %f458, %f1325;
	fma.rn.ftz.f32 	%f1327, %f453, %f598, %f1326;
	mul.ftz.f32 	%f1328, %f600, %f1327;
	fma.rn.ftz.f32 	%f1263, %f457, %f603, %f1328;
$Lt_125_288770:
	.loc	6	203	0
	mov.f32 	%f451, %f1265;
	mov.f32 	%f452, %f1264;
	mov.f32 	%f453, %f1263;
	mov.f32 	%f454, %f1259;
	bra.uni 	$Lt_125_314370;
$Lt_125_3842:
	.loc	22	479	0
	ld.param.f32 	%f1329, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1329, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1330, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1330;
	mov.f32 	%f1331, %f589;
	mov.f32 	%f1332, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1333, %f589, %f1332;
	mov.f32 	%f1334, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p79, %f1333, %f1334;
	@!%p79 bra 	$Lt_125_291074;
	mov.f32 	%f1335, 0f00000000;  	// 0
	mov.f32 	%f1336, 0f00000000;  	// 0
	mov.f32 	%f1337, 0f00000000;  	// 0
	mov.f32 	%f1331, 0f00000000;  	// 0
	bra.uni 	$Lt_125_290818;
$Lt_125_291074:
	.loc	22	386	0
	mov.f32 	%f1338, 0f00000000;  	// 0
	max.ftz.f32 	%f1180, %f455, %f1338;
	mov.f32 	%f1339, 0f00000000;  	// 0
	max.ftz.f32 	%f1182, %f451, %f1339;
	mov.f32 	%f1340, 0f3f800000;  	// 1
	min.ftz.f32 	%f1184, %f1180, %f1340;
	mov.f32 	%f1341, 0f3f800000;  	// 1
	min.ftz.f32 	%f1186, %f1182, %f1341;
	mov.f32 	%f1342, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p80, %f1186, %f1342;
	@!%p80 bra 	$Lt_125_291586;
	add.ftz.f32 	%f1343, %f1186, %f1186;
	mul.ftz.f32 	%f1344, %f1184, %f1343;
	bra.uni 	$Lt_125_291330;
$Lt_125_291586:
	mov.f32 	%f1345, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1346, %f1345, %f1186;
	mov.f32 	%f1347, 0f3f800000;  	// 1
	add.ftz.f32 	%f1348, %f1346, %f1346;
	mov.f32 	%f1349, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1350, %f1349, %f1184;
	mul.ftz.f32 	%f1351, %f1348, %f1350;
	sub.ftz.f32 	%f1344, %f1347, %f1351;
$Lt_125_291330:
	.loc	22	479	0
	mov.f32 	%f1352, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1352, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1353, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1354, %f586, %f599;
	sub.ftz.f32 	%f603, %f1353, %f1354;
	mov.f32 	%f1355, 0f00000000;  	// 0
	max.ftz.f32 	%f1356, %f1344, %f1355;
	mov.f32 	%f1357, 0f3f800000;  	// 1
	min.ftz.f32 	%f1358, %f1356, %f1357;
	mul.ftz.f32 	%f1359, %f458, %f1358;
	fma.rn.ftz.f32 	%f1360, %f451, %f598, %f1359;
	mul.ftz.f32 	%f1361, %f600, %f1360;
	fma.rn.ftz.f32 	%f1337, %f455, %f603, %f1361;
	.loc	22	386	0
	mov.f32 	%f1362, 0f00000000;  	// 0
	max.ftz.f32 	%f1208, %f456, %f1362;
	mov.f32 	%f1363, 0f00000000;  	// 0
	max.ftz.f32 	%f1210, %f452, %f1363;
	mov.f32 	%f1364, 0f3f800000;  	// 1
	min.ftz.f32 	%f1212, %f1208, %f1364;
	mov.f32 	%f1365, 0f3f800000;  	// 1
	min.ftz.f32 	%f1214, %f1210, %f1365;
	mov.f32 	%f1366, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p81, %f1214, %f1366;
	@!%p81 bra 	$Lt_125_292098;
	add.ftz.f32 	%f1367, %f1214, %f1214;
	mul.ftz.f32 	%f1368, %f1212, %f1367;
	bra.uni 	$Lt_125_291842;
$Lt_125_292098:
	mov.f32 	%f1369, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1370, %f1369, %f1214;
	mov.f32 	%f1371, 0f3f800000;  	// 1
	add.ftz.f32 	%f1372, %f1370, %f1370;
	mov.f32 	%f1373, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1374, %f1373, %f1212;
	mul.ftz.f32 	%f1375, %f1372, %f1374;
	sub.ftz.f32 	%f1368, %f1371, %f1375;
$Lt_125_291842:
	.loc	22	479	0
	mov.f32 	%f1376, 0f00000000;  	// 0
	max.ftz.f32 	%f1377, %f1368, %f1376;
	mov.f32 	%f1378, 0f3f800000;  	// 1
	min.ftz.f32 	%f1379, %f1377, %f1378;
	mul.ftz.f32 	%f1380, %f458, %f1379;
	fma.rn.ftz.f32 	%f1381, %f452, %f598, %f1380;
	mul.ftz.f32 	%f1382, %f600, %f1381;
	fma.rn.ftz.f32 	%f1336, %f456, %f603, %f1382;
	.loc	22	386	0
	mov.f32 	%f1383, 0f00000000;  	// 0
	max.ftz.f32 	%f1233, %f457, %f1383;
	mov.f32 	%f1384, 0f00000000;  	// 0
	max.ftz.f32 	%f1235, %f453, %f1384;
	mov.f32 	%f1385, 0f3f800000;  	// 1
	min.ftz.f32 	%f1237, %f1233, %f1385;
	mov.f32 	%f1386, 0f3f800000;  	// 1
	min.ftz.f32 	%f1239, %f1235, %f1386;
	mov.f32 	%f1387, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p82, %f1239, %f1387;
	@!%p82 bra 	$Lt_125_292610;
	add.ftz.f32 	%f1388, %f1239, %f1239;
	mul.ftz.f32 	%f1389, %f1237, %f1388;
	bra.uni 	$Lt_125_292354;
$Lt_125_292610:
	mov.f32 	%f1390, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1391, %f1390, %f1239;
	mov.f32 	%f1392, 0f3f800000;  	// 1
	add.ftz.f32 	%f1393, %f1391, %f1391;
	mov.f32 	%f1394, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1395, %f1394, %f1237;
	mul.ftz.f32 	%f1396, %f1393, %f1395;
	sub.ftz.f32 	%f1389, %f1392, %f1396;
$Lt_125_292354:
	.loc	22	479	0
	mov.f32 	%f1397, 0f00000000;  	// 0
	max.ftz.f32 	%f1398, %f1389, %f1397;
	mov.f32 	%f1399, 0f3f800000;  	// 1
	min.ftz.f32 	%f1400, %f1398, %f1399;
	mul.ftz.f32 	%f1401, %f458, %f1400;
	fma.rn.ftz.f32 	%f1402, %f453, %f598, %f1401;
	mul.ftz.f32 	%f1403, %f600, %f1402;
	fma.rn.ftz.f32 	%f1335, %f457, %f603, %f1403;
$Lt_125_290818:
	.loc	6	204	0
	mov.f32 	%f451, %f1337;
	mov.f32 	%f452, %f1336;
	mov.f32 	%f453, %f1335;
	mov.f32 	%f454, %f1331;
	bra.uni 	$Lt_125_314370;
$Lt_125_4098:
	.loc	22	480	0
	ld.param.f32 	%f1404, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1404, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1405, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1405;
	mov.f32 	%f1406, %f589;
	mov.f32 	%f1407, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1408, %f589, %f1407;
	mov.f32 	%f1409, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p83, %f1408, %f1409;
	@!%p83 bra 	$Lt_125_293122;
	mov.f32 	%f1410, 0f00000000;  	// 0
	mov.f32 	%f1411, 0f00000000;  	// 0
	mov.f32 	%f1412, 0f00000000;  	// 0
	mov.f32 	%f1406, 0f00000000;  	// 0
	bra.uni 	$Lt_125_292866;
$Lt_125_293122:
	.loc	22	431	0
	mov.f32 	%f1413, 0f00000000;  	// 0
	max.ftz.f32 	%f1180, %f455, %f1413;
	mov.f32 	%f1414, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1415, %f451, %f1414;
	mov.f32 	%f1416, 0f3f800000;  	// 1
	min.ftz.f32 	%f1184, %f1180, %f1416;
	mov.f32 	%f1417, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1418, %f1415, %f1417;
	mov.f32 	%f1419, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p84, %f1418, %f1419;
	@!%p84 bra 	$Lt_125_243202;
	.loc	22	433	0
	mov.f32 	%f1420, 0f3f800000;  	// 1
	mov.f32 	%f1421, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1422, %f1421, %f1184;
	add.ftz.f32 	%f1423, %f1418, %f1418;
	div.approx.ftz.f32 	%f1424, %f1422, %f1423;
	sub.ftz.f32 	%f1425, %f1420, %f1424;
	mov.f32 	%f1426, 0f00000000;  	// 0
	max.ftz.f32 	%f1427, %f1425, %f1426;
	mov.f32 	%f1428, 0f3f800000;  	// 1
	min.ftz.f32 	%f1429, %f1427, %f1428;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__302_46;
$Lt_125_243202:
	.loc	22	437	0
	mov.f32 	%f1430, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1431, %f1430, %f1418;
	add.ftz.f32 	%f1432, %f1431, %f1431;
	div.approx.ftz.f32 	%f1433, %f1184, %f1432;
	mov.f32 	%f1434, 0f00000000;  	// 0
	max.ftz.f32 	%f1435, %f1433, %f1434;
	mov.f32 	%f1436, 0f3f800000;  	// 1
	min.ftz.f32 	%f1429, %f1435, %f1436;
$LDWendi__Z5ClampIfET_S0_S0_S0__302_46:
	.loc	22	480	0
	mov.f32 	%f1437, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1437, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1438, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1439, %f586, %f599;
	sub.ftz.f32 	%f603, %f1438, %f1439;
	mul.ftz.f32 	%f1440, %f1429, %f458;
	fma.rn.ftz.f32 	%f1441, %f451, %f598, %f1440;
	mul.ftz.f32 	%f1442, %f600, %f1441;
	fma.rn.ftz.f32 	%f1412, %f455, %f603, %f1442;
	.loc	22	431	0
	mov.f32 	%f1443, 0f00000000;  	// 0
	max.ftz.f32 	%f1208, %f456, %f1443;
	mov.f32 	%f1444, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1445, %f452, %f1444;
	mov.f32 	%f1446, 0f3f800000;  	// 1
	min.ftz.f32 	%f1212, %f1208, %f1446;
	mov.f32 	%f1447, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1448, %f1445, %f1447;
	mov.f32 	%f1449, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p85, %f1448, %f1449;
	@!%p85 bra 	$Lt_125_243458;
	.loc	22	433	0
	mov.f32 	%f1450, 0f3f800000;  	// 1
	mov.f32 	%f1451, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1452, %f1451, %f1212;
	add.ftz.f32 	%f1453, %f1448, %f1448;
	div.approx.ftz.f32 	%f1454, %f1452, %f1453;
	sub.ftz.f32 	%f1455, %f1450, %f1454;
	mov.f32 	%f1456, 0f00000000;  	// 0
	max.ftz.f32 	%f1457, %f1455, %f1456;
	mov.f32 	%f1458, 0f3f800000;  	// 1
	min.ftz.f32 	%f1459, %f1457, %f1458;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__302_44;
$Lt_125_243458:
	.loc	22	437	0
	mov.f32 	%f1460, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1461, %f1460, %f1448;
	add.ftz.f32 	%f1462, %f1461, %f1461;
	div.approx.ftz.f32 	%f1463, %f1212, %f1462;
	mov.f32 	%f1464, 0f00000000;  	// 0
	max.ftz.f32 	%f1465, %f1463, %f1464;
	mov.f32 	%f1466, 0f3f800000;  	// 1
	min.ftz.f32 	%f1459, %f1465, %f1466;
$LDWendi__Z5ClampIfET_S0_S0_S0__302_44:
	.loc	22	480	0
	mul.ftz.f32 	%f1467, %f1459, %f458;
	fma.rn.ftz.f32 	%f1468, %f452, %f598, %f1467;
	mul.ftz.f32 	%f1469, %f600, %f1468;
	fma.rn.ftz.f32 	%f1411, %f456, %f603, %f1469;
	.loc	22	431	0
	mov.f32 	%f1470, 0f00000000;  	// 0
	max.ftz.f32 	%f1233, %f457, %f1470;
	mov.f32 	%f1471, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1472, %f453, %f1471;
	mov.f32 	%f1473, 0f3f800000;  	// 1
	min.ftz.f32 	%f1237, %f1233, %f1473;
	mov.f32 	%f1474, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1475, %f1472, %f1474;
	mov.f32 	%f1476, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p86, %f1475, %f1476;
	@!%p86 bra 	$Lt_125_243714;
	.loc	22	433	0
	mov.f32 	%f1477, 0f3f800000;  	// 1
	mov.f32 	%f1478, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1479, %f1478, %f1237;
	add.ftz.f32 	%f1480, %f1475, %f1475;
	div.approx.ftz.f32 	%f1481, %f1479, %f1480;
	sub.ftz.f32 	%f1482, %f1477, %f1481;
	mov.f32 	%f1483, 0f00000000;  	// 0
	max.ftz.f32 	%f1484, %f1482, %f1483;
	mov.f32 	%f1485, 0f3f800000;  	// 1
	min.ftz.f32 	%f1486, %f1484, %f1485;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__302_42;
$Lt_125_243714:
	.loc	22	437	0
	mov.f32 	%f1487, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1488, %f1487, %f1475;
	add.ftz.f32 	%f1489, %f1488, %f1488;
	div.approx.ftz.f32 	%f1490, %f1237, %f1489;
	mov.f32 	%f1491, 0f00000000;  	// 0
	max.ftz.f32 	%f1492, %f1490, %f1491;
	mov.f32 	%f1493, 0f3f800000;  	// 1
	min.ftz.f32 	%f1486, %f1492, %f1493;
$LDWendi__Z5ClampIfET_S0_S0_S0__302_42:
	.loc	22	480	0
	mul.ftz.f32 	%f1494, %f1486, %f458;
	fma.rn.ftz.f32 	%f1495, %f453, %f598, %f1494;
	mul.ftz.f32 	%f1496, %f600, %f1495;
	fma.rn.ftz.f32 	%f1410, %f457, %f603, %f1496;
$Lt_125_292866:
	.loc	6	205	0
	mov.f32 	%f451, %f1412;
	mov.f32 	%f452, %f1411;
	mov.f32 	%f453, %f1410;
	mov.f32 	%f454, %f1406;
	bra.uni 	$Lt_125_314370;
$Lt_125_4354:
	.loc	22	481	0
	ld.param.f32 	%f1497, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1497, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1498, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1498;
	mov.f32 	%f1499, %f589;
	mov.f32 	%f1500, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1501, %f589, %f1500;
	mov.f32 	%f1502, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p87, %f1501, %f1502;
	@!%p87 bra 	$Lt_125_293634;
	mov.f32 	%f1503, 0f00000000;  	// 0
	mov.f32 	%f1504, 0f00000000;  	// 0
	mov.f32 	%f1505, 0f00000000;  	// 0
	mov.f32 	%f1499, 0f00000000;  	// 0
	bra.uni 	$Lt_125_293378;
$Lt_125_293634:
	mov.f32 	%f1506, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1506, %f458;
	mov.f32 	%f1507, 0f00000000;  	// 0
	max.ftz.f32 	%f1182, %f451, %f1507;
	mov.f32 	%f1508, 0f3f800000;  	// 1
	min.ftz.f32 	%f1186, %f1182, %f1508;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1509, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1510, %f586, %f599;
	sub.ftz.f32 	%f603, %f1509, %f1510;
	add.ftz.f32 	%f1511, %f1186, %f1186;
	mov.f32 	%f1512, 0f00000000;  	// 0
	max.ftz.f32 	%f1513, %f455, %f1512;
	mov.f32 	%f1514, 0f3f800000;  	// 1
	min.ftz.f32 	%f1515, %f1513, %f1514;
	add.ftz.f32 	%f1516, %f1511, %f1515;
	mov.f32 	%f1517, 0fbf800000;  	// -1
	add.ftz.f32 	%f1518, %f1516, %f1517;
	mul.ftz.f32 	%f1519, %f458, %f1518;
	fma.rn.ftz.f32 	%f1520, %f451, %f598, %f1519;
	mul.ftz.f32 	%f1521, %f600, %f1520;
	fma.rn.ftz.f32 	%f1505, %f455, %f603, %f1521;
	mov.f32 	%f1522, 0f00000000;  	// 0
	max.ftz.f32 	%f1210, %f452, %f1522;
	mov.f32 	%f1523, 0f3f800000;  	// 1
	min.ftz.f32 	%f1214, %f1210, %f1523;
	add.ftz.f32 	%f1524, %f1214, %f1214;
	mov.f32 	%f1525, 0f00000000;  	// 0
	max.ftz.f32 	%f1526, %f456, %f1525;
	mov.f32 	%f1527, 0f3f800000;  	// 1
	min.ftz.f32 	%f1528, %f1526, %f1527;
	add.ftz.f32 	%f1529, %f1524, %f1528;
	mov.f32 	%f1530, 0fbf800000;  	// -1
	add.ftz.f32 	%f1531, %f1529, %f1530;
	mul.ftz.f32 	%f1532, %f458, %f1531;
	fma.rn.ftz.f32 	%f1533, %f452, %f598, %f1532;
	mul.ftz.f32 	%f1534, %f600, %f1533;
	fma.rn.ftz.f32 	%f1504, %f456, %f603, %f1534;
	mov.f32 	%f1535, 0f00000000;  	// 0
	max.ftz.f32 	%f1235, %f453, %f1535;
	mov.f32 	%f1536, 0f3f800000;  	// 1
	min.ftz.f32 	%f1239, %f1235, %f1536;
	add.ftz.f32 	%f1537, %f1239, %f1239;
	mov.f32 	%f1538, 0f00000000;  	// 0
	max.ftz.f32 	%f1539, %f457, %f1538;
	mov.f32 	%f1540, 0f3f800000;  	// 1
	min.ftz.f32 	%f1541, %f1539, %f1540;
	add.ftz.f32 	%f1542, %f1537, %f1541;
	mov.f32 	%f1543, 0fbf800000;  	// -1
	add.ftz.f32 	%f1544, %f1542, %f1543;
	mul.ftz.f32 	%f1545, %f458, %f1544;
	fma.rn.ftz.f32 	%f1546, %f453, %f598, %f1545;
	mul.ftz.f32 	%f1547, %f600, %f1546;
	fma.rn.ftz.f32 	%f1503, %f457, %f603, %f1547;
$Lt_125_293378:
	.loc	6	206	0
	mov.f32 	%f451, %f1505;
	mov.f32 	%f452, %f1504;
	mov.f32 	%f453, %f1503;
	mov.f32 	%f454, %f1499;
	bra.uni 	$Lt_125_314370;
$Lt_125_4610:
	.loc	22	482	0
	ld.param.f32 	%f1548, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1548, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1549, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1549;
	mov.f32 	%f1550, %f589;
	mov.f32 	%f1551, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1552, %f589, %f1551;
	mov.f32 	%f1553, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p88, %f1552, %f1553;
	@!%p88 bra 	$Lt_125_294146;
	mov.f32 	%f1554, 0f00000000;  	// 0
	mov.f32 	%f1555, 0f00000000;  	// 0
	mov.f32 	%f1556, 0f00000000;  	// 0
	mov.f32 	%f1550, 0f00000000;  	// 0
	bra.uni 	$Lt_125_293890;
$Lt_125_294146:
	.loc	22	450	0
	mov.f32 	%f1557, 0f00000000;  	// 0
	max.ftz.f32 	%f1180, %f455, %f1557;
	mov.f32 	%f1558, 0f00000000;  	// 0
	max.ftz.f32 	%f1182, %f451, %f1558;
	mov.f32 	%f1559, 0f3f800000;  	// 1
	min.ftz.f32 	%f1184, %f1180, %f1559;
	mov.f32 	%f1560, 0f3f800000;  	// 1
	min.ftz.f32 	%f1186, %f1182, %f1560;
	add.ftz.f32 	%f1561, %f1186, %f1186;
	mov.f32 	%f1562, 0fbf800000;  	// -1
	add.ftz.f32 	%f1563, %f1561, %f1562;
	setp.gt.ftz.f32 	%p89, %f1563, %f1184;
	@!%p89 bra 	$Lt_125_244482;
	.loc	22	452	0
	mov.f32 	%f1564, %f1563;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__302_40;
$Lt_125_244482:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p90, %f1561, %f1184;
	@!%p90 bra 	$Lt_125_244738;
	.loc	22	456	0
	mov.f32 	%f1564, %f1561;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__302_40;
$Lt_125_244738:
	.loc	22	460	0
	mov.f32 	%f1564, %f1184;
$LDWendi__Z5ClampIfET_S0_S0_S0__302_40:
	.loc	22	482	0
	mov.f32 	%f1565, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1565, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1566, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1567, %f586, %f599;
	sub.ftz.f32 	%f603, %f1566, %f1567;
	mul.ftz.f32 	%f1568, %f1564, %f458;
	fma.rn.ftz.f32 	%f1569, %f451, %f598, %f1568;
	mul.ftz.f32 	%f1570, %f600, %f1569;
	fma.rn.ftz.f32 	%f1556, %f455, %f603, %f1570;
	.loc	22	450	0
	mov.f32 	%f1571, 0f00000000;  	// 0
	max.ftz.f32 	%f1208, %f456, %f1571;
	mov.f32 	%f1572, 0f00000000;  	// 0
	max.ftz.f32 	%f1210, %f452, %f1572;
	mov.f32 	%f1573, 0f3f800000;  	// 1
	min.ftz.f32 	%f1212, %f1208, %f1573;
	mov.f32 	%f1574, 0f3f800000;  	// 1
	min.ftz.f32 	%f1214, %f1210, %f1574;
	add.ftz.f32 	%f1575, %f1214, %f1214;
	mov.f32 	%f1576, 0fbf800000;  	// -1
	add.ftz.f32 	%f1577, %f1575, %f1576;
	setp.gt.ftz.f32 	%p91, %f1577, %f1212;
	@!%p91 bra 	$Lt_125_244994;
	.loc	22	452	0
	mov.f32 	%f1578, %f1577;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__302_38;
$Lt_125_244994:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p92, %f1575, %f1212;
	@!%p92 bra 	$Lt_125_245250;
	.loc	22	456	0
	mov.f32 	%f1578, %f1575;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__302_38;
$Lt_125_245250:
	.loc	22	460	0
	mov.f32 	%f1578, %f1212;
$LDWendi__Z5ClampIfET_S0_S0_S0__302_38:
	.loc	22	482	0
	mul.ftz.f32 	%f1579, %f1578, %f458;
	fma.rn.ftz.f32 	%f1580, %f452, %f598, %f1579;
	mul.ftz.f32 	%f1581, %f600, %f1580;
	fma.rn.ftz.f32 	%f1555, %f456, %f603, %f1581;
	.loc	22	450	0
	mov.f32 	%f1582, 0f00000000;  	// 0
	max.ftz.f32 	%f1233, %f457, %f1582;
	mov.f32 	%f1583, 0f00000000;  	// 0
	max.ftz.f32 	%f1235, %f453, %f1583;
	mov.f32 	%f1584, 0f3f800000;  	// 1
	min.ftz.f32 	%f1237, %f1233, %f1584;
	mov.f32 	%f1585, 0f3f800000;  	// 1
	min.ftz.f32 	%f1239, %f1235, %f1585;
	add.ftz.f32 	%f1586, %f1239, %f1239;
	mov.f32 	%f1587, 0fbf800000;  	// -1
	add.ftz.f32 	%f1588, %f1586, %f1587;
	setp.gt.ftz.f32 	%p93, %f1588, %f1237;
	@!%p93 bra 	$Lt_125_245506;
	.loc	22	452	0
	mov.f32 	%f1589, %f1588;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__302_36;
$Lt_125_245506:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p94, %f1586, %f1237;
	@!%p94 bra 	$Lt_125_245762;
	.loc	22	456	0
	mov.f32 	%f1589, %f1586;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__302_36;
$Lt_125_245762:
	.loc	22	460	0
	mov.f32 	%f1589, %f1237;
$LDWendi__Z5ClampIfET_S0_S0_S0__302_36:
	.loc	22	482	0
	mul.ftz.f32 	%f1590, %f1589, %f458;
	fma.rn.ftz.f32 	%f1591, %f453, %f598, %f1590;
	mul.ftz.f32 	%f1592, %f600, %f1591;
	fma.rn.ftz.f32 	%f1554, %f457, %f603, %f1592;
$Lt_125_293890:
	.loc	6	207	0
	mov.f32 	%f451, %f1556;
	mov.f32 	%f452, %f1555;
	mov.f32 	%f453, %f1554;
	mov.f32 	%f454, %f1550;
	bra.uni 	$Lt_125_314370;
$Lt_125_4866:
	.loc	22	483	0
	ld.param.f32 	%f1593, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1593, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1594, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1594;
	mov.f32 	%f1595, %f589;
	mov.f32 	%f1596, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1597, %f589, %f1596;
	mov.f32 	%f1598, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p95, %f1597, %f1598;
	@!%p95 bra 	$Lt_125_294658;
	mov.f32 	%f1599, 0f00000000;  	// 0
	mov.f32 	%f1600, 0f00000000;  	// 0
	mov.f32 	%f1601, 0f00000000;  	// 0
	mov.f32 	%f1595, 0f00000000;  	// 0
	bra.uni 	$Lt_125_294402;
$Lt_125_294658:
	mov.f32 	%f1602, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1602, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1603, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1604, %f586, %f599;
	sub.ftz.f32 	%f603, %f1603, %f1604;
	mov.f32 	%f1605, 0f00000000;  	// 0
	mov.f32 	%f1606, 0f3f800000;  	// 1
	mov.f32 	%f1607, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1608, %f1607, %f455;
	setp.lt.ftz.f32 	%p96, %f451, %f1608;
	selp.f32 	%f1609, %f1605, %f1606, %p96;
	mul.ftz.f32 	%f1610, %f1609, %f458;
	fma.rn.ftz.f32 	%f1611, %f451, %f598, %f1610;
	mul.ftz.f32 	%f1612, %f600, %f1611;
	fma.rn.ftz.f32 	%f1601, %f455, %f603, %f1612;
	mov.f32 	%f1613, 0f00000000;  	// 0
	mov.f32 	%f1614, 0f3f800000;  	// 1
	mov.f32 	%f1615, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1616, %f1615, %f456;
	setp.lt.ftz.f32 	%p97, %f452, %f1616;
	selp.f32 	%f1617, %f1613, %f1614, %p97;
	mul.ftz.f32 	%f1618, %f1617, %f458;
	fma.rn.ftz.f32 	%f1619, %f452, %f598, %f1618;
	mul.ftz.f32 	%f1620, %f600, %f1619;
	fma.rn.ftz.f32 	%f1600, %f456, %f603, %f1620;
	mov.f32 	%f1621, 0f00000000;  	// 0
	mov.f32 	%f1622, 0f3f800000;  	// 1
	mov.f32 	%f1623, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1624, %f1623, %f457;
	setp.lt.ftz.f32 	%p98, %f453, %f1624;
	selp.f32 	%f1625, %f1621, %f1622, %p98;
	mul.ftz.f32 	%f1626, %f1625, %f458;
	fma.rn.ftz.f32 	%f1627, %f453, %f598, %f1626;
	mul.ftz.f32 	%f1628, %f600, %f1627;
	fma.rn.ftz.f32 	%f1599, %f457, %f603, %f1628;
$Lt_125_294402:
	.loc	6	208	0
	mov.f32 	%f451, %f1601;
	mov.f32 	%f452, %f1600;
	mov.f32 	%f453, %f1599;
	mov.f32 	%f454, %f1595;
	bra.uni 	$Lt_125_314370;
$Lt_125_5122:
	.loc	22	484	0
	ld.param.f32 	%f1629, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1629, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1630, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1630;
	mov.f32 	%f1631, %f589;
	mov.f32 	%f1632, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1633, %f589, %f1632;
	mov.f32 	%f1634, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p99, %f1633, %f1634;
	@!%p99 bra 	$Lt_125_295170;
	mov.f32 	%f1635, 0f00000000;  	// 0
	mov.f32 	%f1636, 0f00000000;  	// 0
	mov.f32 	%f1637, 0f00000000;  	// 0
	mov.f32 	%f1631, 0f00000000;  	// 0
	bra.uni 	$Lt_125_294914;
$Lt_125_295170:
	mov.f32 	%f1638, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1638, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1639, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1640, %f586, %f599;
	sub.ftz.f32 	%f603, %f1639, %f1640;
	sub.ftz.f32 	%f1641, %f451, %f455;
	abs.ftz.f32 	%f1642, %f1641;
	mul.ftz.f32 	%f1643, %f458, %f1642;
	fma.rn.ftz.f32 	%f1644, %f451, %f598, %f1643;
	mul.ftz.f32 	%f1645, %f600, %f1644;
	fma.rn.ftz.f32 	%f1637, %f455, %f603, %f1645;
	sub.ftz.f32 	%f1646, %f452, %f456;
	abs.ftz.f32 	%f1647, %f1646;
	mul.ftz.f32 	%f1648, %f458, %f1647;
	fma.rn.ftz.f32 	%f1649, %f452, %f598, %f1648;
	mul.ftz.f32 	%f1650, %f600, %f1649;
	fma.rn.ftz.f32 	%f1636, %f456, %f603, %f1650;
	sub.ftz.f32 	%f1651, %f453, %f457;
	abs.ftz.f32 	%f1652, %f1651;
	mul.ftz.f32 	%f1653, %f458, %f1652;
	fma.rn.ftz.f32 	%f1654, %f453, %f598, %f1653;
	mul.ftz.f32 	%f1655, %f600, %f1654;
	fma.rn.ftz.f32 	%f1635, %f457, %f603, %f1655;
$Lt_125_294914:
	.loc	6	209	0
	mov.f32 	%f451, %f1637;
	mov.f32 	%f452, %f1636;
	mov.f32 	%f453, %f1635;
	mov.f32 	%f454, %f1631;
	bra.uni 	$Lt_125_314370;
$Lt_125_5378:
	.loc	22	485	0
	ld.param.f32 	%f1656, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1656, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1657, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1657;
	mov.f32 	%f1658, %f589;
	mov.f32 	%f1659, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1660, %f589, %f1659;
	mov.f32 	%f1661, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p100, %f1660, %f1661;
	@!%p100 bra 	$Lt_125_295682;
	mov.f32 	%f1662, 0f00000000;  	// 0
	mov.f32 	%f1663, 0f00000000;  	// 0
	mov.f32 	%f1664, 0f00000000;  	// 0
	mov.f32 	%f1658, 0f00000000;  	// 0
	bra.uni 	$Lt_125_295426;
$Lt_125_295682:
	mov.f32 	%f1665, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1665, %f458;
	mov.f32 	%f1666, 0f00000000;  	// 0
	max.ftz.f32 	%f1180, %f455, %f1666;
	mov.f32 	%f1667, 0f00000000;  	// 0
	max.ftz.f32 	%f1182, %f451, %f1667;
	mov.f32 	%f1668, 0f3f800000;  	// 1
	min.ftz.f32 	%f1184, %f1180, %f1668;
	mov.f32 	%f1669, 0f3f800000;  	// 1
	min.ftz.f32 	%f1186, %f1182, %f1669;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1670, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1671, %f586, %f599;
	sub.ftz.f32 	%f603, %f1670, %f1671;
	add.ftz.f32 	%f1672, %f1186, %f1184;
	add.ftz.f32 	%f1673, %f1186, %f1186;
	mul.ftz.f32 	%f1674, %f1184, %f1673;
	sub.ftz.f32 	%f1675, %f1672, %f1674;
	mov.f32 	%f1676, 0f00000000;  	// 0
	max.ftz.f32 	%f1677, %f1675, %f1676;
	mov.f32 	%f1678, 0f3f800000;  	// 1
	min.ftz.f32 	%f1679, %f1677, %f1678;
	mul.ftz.f32 	%f1680, %f458, %f1679;
	fma.rn.ftz.f32 	%f1681, %f451, %f598, %f1680;
	mul.ftz.f32 	%f1682, %f600, %f1681;
	fma.rn.ftz.f32 	%f1664, %f455, %f603, %f1682;
	mov.f32 	%f1683, 0f00000000;  	// 0
	max.ftz.f32 	%f1208, %f456, %f1683;
	mov.f32 	%f1684, 0f00000000;  	// 0
	max.ftz.f32 	%f1210, %f452, %f1684;
	mov.f32 	%f1685, 0f3f800000;  	// 1
	min.ftz.f32 	%f1212, %f1208, %f1685;
	mov.f32 	%f1686, 0f3f800000;  	// 1
	min.ftz.f32 	%f1214, %f1210, %f1686;
	add.ftz.f32 	%f1687, %f1214, %f1212;
	add.ftz.f32 	%f1688, %f1214, %f1214;
	mul.ftz.f32 	%f1689, %f1212, %f1688;
	sub.ftz.f32 	%f1690, %f1687, %f1689;
	mov.f32 	%f1691, 0f00000000;  	// 0
	max.ftz.f32 	%f1692, %f1690, %f1691;
	mov.f32 	%f1693, 0f3f800000;  	// 1
	min.ftz.f32 	%f1694, %f1692, %f1693;
	mul.ftz.f32 	%f1695, %f458, %f1694;
	fma.rn.ftz.f32 	%f1696, %f452, %f598, %f1695;
	mul.ftz.f32 	%f1697, %f600, %f1696;
	fma.rn.ftz.f32 	%f1663, %f456, %f603, %f1697;
	mov.f32 	%f1698, 0f00000000;  	// 0
	max.ftz.f32 	%f1233, %f457, %f1698;
	mov.f32 	%f1699, 0f00000000;  	// 0
	max.ftz.f32 	%f1235, %f453, %f1699;
	mov.f32 	%f1700, 0f3f800000;  	// 1
	min.ftz.f32 	%f1237, %f1233, %f1700;
	mov.f32 	%f1701, 0f3f800000;  	// 1
	min.ftz.f32 	%f1239, %f1235, %f1701;
	add.ftz.f32 	%f1702, %f1239, %f1237;
	add.ftz.f32 	%f1703, %f1239, %f1239;
	mul.ftz.f32 	%f1704, %f1237, %f1703;
	sub.ftz.f32 	%f1705, %f1702, %f1704;
	mov.f32 	%f1706, 0f00000000;  	// 0
	max.ftz.f32 	%f1707, %f1705, %f1706;
	mov.f32 	%f1708, 0f3f800000;  	// 1
	min.ftz.f32 	%f1709, %f1707, %f1708;
	mul.ftz.f32 	%f1710, %f458, %f1709;
	fma.rn.ftz.f32 	%f1711, %f453, %f598, %f1710;
	mul.ftz.f32 	%f1712, %f600, %f1711;
	fma.rn.ftz.f32 	%f1662, %f457, %f603, %f1712;
$Lt_125_295426:
	.loc	6	210	0
	mov.f32 	%f451, %f1664;
	mov.f32 	%f452, %f1663;
	mov.f32 	%f453, %f1662;
	mov.f32 	%f454, %f1658;
	bra.uni 	$Lt_125_314370;
$Lt_125_5634:
	.loc	22	486	0
	ld.param.f32 	%f1713, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1713, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1714, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1714;
	mov.f32 	%f1715, %f589;
	mov.f32 	%f1716, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1717, %f589, %f1716;
	mov.f32 	%f1718, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p101, %f1717, %f1718;
	@!%p101 bra 	$Lt_125_296194;
	mov.f32 	%f1719, 0f00000000;  	// 0
	mov.f32 	%f1720, 0f00000000;  	// 0
	mov.f32 	%f1721, 0f00000000;  	// 0
	mov.f32 	%f1715, 0f00000000;  	// 0
	bra.uni 	$Lt_125_295938;
$Lt_125_296194:
	mov.f32 	%f1722, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1722, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1723, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1724, %f586, %f599;
	sub.ftz.f32 	%f603, %f1723, %f1724;
	mov.f32 	%f1725, 0f00000000;  	// 0
	max.ftz.f32 	%f1726, %f455, %f1725;
	mov.f32 	%f1727, 0f3f800000;  	// 1
	min.ftz.f32 	%f1728, %f1726, %f1727;
	mov.f32 	%f1729, 0f00000000;  	// 0
	max.ftz.f32 	%f1730, %f451, %f1729;
	mov.f32 	%f1731, 0f3f800000;  	// 1
	min.ftz.f32 	%f1732, %f1730, %f1731;
	sub.ftz.f32 	%f1733, %f1728, %f1732;
	mov.f32 	%f1734, 0f00000000;  	// 0
	max.ftz.f32 	%f1735, %f1733, %f1734;
	mov.f32 	%f1736, 0f3f800000;  	// 1
	min.ftz.f32 	%f1737, %f1735, %f1736;
	mul.ftz.f32 	%f1738, %f458, %f1737;
	fma.rn.ftz.f32 	%f1739, %f451, %f598, %f1738;
	mul.ftz.f32 	%f1740, %f600, %f1739;
	fma.rn.ftz.f32 	%f1721, %f455, %f603, %f1740;
	mov.f32 	%f1741, 0f00000000;  	// 0
	max.ftz.f32 	%f1742, %f456, %f1741;
	mov.f32 	%f1743, 0f3f800000;  	// 1
	min.ftz.f32 	%f1744, %f1742, %f1743;
	mov.f32 	%f1745, 0f00000000;  	// 0
	max.ftz.f32 	%f1746, %f452, %f1745;
	mov.f32 	%f1747, 0f3f800000;  	// 1
	min.ftz.f32 	%f1748, %f1746, %f1747;
	sub.ftz.f32 	%f1749, %f1744, %f1748;
	mov.f32 	%f1750, 0f00000000;  	// 0
	max.ftz.f32 	%f1751, %f1749, %f1750;
	mov.f32 	%f1752, 0f3f800000;  	// 1
	min.ftz.f32 	%f1753, %f1751, %f1752;
	mul.ftz.f32 	%f1754, %f458, %f1753;
	fma.rn.ftz.f32 	%f1755, %f452, %f598, %f1754;
	mul.ftz.f32 	%f1756, %f600, %f1755;
	fma.rn.ftz.f32 	%f1720, %f456, %f603, %f1756;
	mov.f32 	%f1757, 0f00000000;  	// 0
	max.ftz.f32 	%f1758, %f457, %f1757;
	mov.f32 	%f1759, 0f3f800000;  	// 1
	min.ftz.f32 	%f1760, %f1758, %f1759;
	mov.f32 	%f1761, 0f00000000;  	// 0
	max.ftz.f32 	%f1762, %f453, %f1761;
	mov.f32 	%f1763, 0f3f800000;  	// 1
	min.ftz.f32 	%f1764, %f1762, %f1763;
	sub.ftz.f32 	%f1765, %f1760, %f1764;
	mov.f32 	%f1766, 0f00000000;  	// 0
	max.ftz.f32 	%f1767, %f1765, %f1766;
	mov.f32 	%f1768, 0f3f800000;  	// 1
	min.ftz.f32 	%f1769, %f1767, %f1768;
	mul.ftz.f32 	%f1770, %f458, %f1769;
	fma.rn.ftz.f32 	%f1771, %f453, %f598, %f1770;
	mul.ftz.f32 	%f1772, %f600, %f1771;
	fma.rn.ftz.f32 	%f1719, %f457, %f603, %f1772;
$Lt_125_295938:
	.loc	6	211	0
	mov.f32 	%f451, %f1721;
	mov.f32 	%f452, %f1720;
	mov.f32 	%f453, %f1719;
	mov.f32 	%f454, %f1715;
	bra.uni 	$Lt_125_314370;
$Lt_125_5890:
	.loc	22	487	0
	ld.param.f32 	%f1773, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1773, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1774, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1774;
	mov.f32 	%f1775, %f589;
	mov.f32 	%f1776, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1777, %f589, %f1776;
	mov.f32 	%f1778, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p102, %f1777, %f1778;
	@!%p102 bra 	$Lt_125_296706;
	mov.f32 	%f1779, 0f00000000;  	// 0
	mov.f32 	%f1780, 0f00000000;  	// 0
	mov.f32 	%f1781, 0f00000000;  	// 0
	mov.f32 	%f1775, 0f00000000;  	// 0
	bra.uni 	$Lt_125_296450;
$Lt_125_296706:
	mov.f32 	%f1782, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1782, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1783, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1784, %f586, %f599;
	sub.ftz.f32 	%f603, %f1783, %f1784;
	mov.f32 	%f1785, 0f00000000;  	// 0
	max.ftz.f32 	%f1786, %f455, %f1785;
	mov.f32 	%f1787, 0f3f800000;  	// 1
	min.ftz.f32 	%f1788, %f1786, %f1787;
	mov.f32 	%f1789, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1790, %f451, %f1789;
	mov.f32 	%f1791, 0f3f800000;  	// 1
	min.ftz.f32 	%f1792, %f1790, %f1791;
	div.approx.ftz.f32 	%f1793, %f1788, %f1792;
	mov.f32 	%f1794, 0f00000000;  	// 0
	max.ftz.f32 	%f1795, %f1793, %f1794;
	mov.f32 	%f1796, 0f3f800000;  	// 1
	min.ftz.f32 	%f1797, %f1795, %f1796;
	mul.ftz.f32 	%f1798, %f458, %f1797;
	fma.rn.ftz.f32 	%f1799, %f451, %f598, %f1798;
	mul.ftz.f32 	%f1800, %f600, %f1799;
	fma.rn.ftz.f32 	%f1781, %f455, %f603, %f1800;
	mov.f32 	%f1801, 0f00000000;  	// 0
	max.ftz.f32 	%f1802, %f456, %f1801;
	mov.f32 	%f1803, 0f3f800000;  	// 1
	min.ftz.f32 	%f1804, %f1802, %f1803;
	mov.f32 	%f1805, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1806, %f452, %f1805;
	mov.f32 	%f1807, 0f3f800000;  	// 1
	min.ftz.f32 	%f1808, %f1806, %f1807;
	div.approx.ftz.f32 	%f1809, %f1804, %f1808;
	mov.f32 	%f1810, 0f00000000;  	// 0
	max.ftz.f32 	%f1811, %f1809, %f1810;
	mov.f32 	%f1812, 0f3f800000;  	// 1
	min.ftz.f32 	%f1813, %f1811, %f1812;
	mul.ftz.f32 	%f1814, %f458, %f1813;
	fma.rn.ftz.f32 	%f1815, %f452, %f598, %f1814;
	mul.ftz.f32 	%f1816, %f600, %f1815;
	fma.rn.ftz.f32 	%f1780, %f456, %f603, %f1816;
	mov.f32 	%f1817, 0f00000000;  	// 0
	max.ftz.f32 	%f1818, %f457, %f1817;
	mov.f32 	%f1819, 0f3f800000;  	// 1
	min.ftz.f32 	%f1820, %f1818, %f1819;
	mov.f32 	%f1821, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1822, %f453, %f1821;
	mov.f32 	%f1823, 0f3f800000;  	// 1
	min.ftz.f32 	%f1824, %f1822, %f1823;
	div.approx.ftz.f32 	%f1825, %f1820, %f1824;
	mov.f32 	%f1826, 0f00000000;  	// 0
	max.ftz.f32 	%f1827, %f1825, %f1826;
	mov.f32 	%f1828, 0f3f800000;  	// 1
	min.ftz.f32 	%f1829, %f1827, %f1828;
	mul.ftz.f32 	%f1830, %f458, %f1829;
	fma.rn.ftz.f32 	%f1831, %f453, %f598, %f1830;
	mul.ftz.f32 	%f1832, %f600, %f1831;
	fma.rn.ftz.f32 	%f1779, %f457, %f603, %f1832;
$Lt_125_296450:
	.loc	6	212	0
	mov.f32 	%f451, %f1781;
	mov.f32 	%f452, %f1780;
	mov.f32 	%f453, %f1779;
	mov.f32 	%f454, %f1775;
	bra.uni 	$Lt_125_314370;
$Lt_125_6146:
	.loc	22	154	0
	setp.gt.ftz.f32 	%p103, %f452, %f451;
	max.ftz.f32 	%f1833, %f452, %f451;
	selp.f32 	%f1834, %f451, %f452, %p103;
	max.ftz.f32 	%f1835, %f1833, %f453;
	setp.lt.ftz.f32 	%p104, %f1834, %f453;
	selp.f32 	%f1836, %f1834, %f453, %p104;
	setp.eq.ftz.f32 	%p105, %f1836, %f453;
	@!%p105 bra 	$Lt_125_297218;
	setp.eq.ftz.f32 	%p106, %f1835, %f452;
	@!%p106 bra 	$Lt_125_297730;
	setp.lt.ftz.f32 	%p107, %f453, %f452;
	@!%p107 bra 	$Lt_125_298242;
	.loc	22	161	0
	max.ftz.f32 	%f1837, %f455, %f456;
	setp.lt.ftz.f32 	%p108, %f455, %f456;
	max.ftz.f32 	%f1838, %f1837, %f457;
	selp.f32 	%f1839, %f455, %f456, %p108;
	setp.lt.ftz.f32 	%p109, %f1839, %f457;
	selp.f32 	%f1840, %f1839, %f457, %p109;
	sub.ftz.f32 	%f1841, %f1838, %f1840;
	cvt.ftz.sat.f32.f32 	%f1842, %f1841;
	sub.ftz.f32 	%f1843, %f451, %f453;
	mul.ftz.f32 	%f1844, %f1842, %f1843;
	sub.ftz.f32 	%f1845, %f452, %f453;
	div.approx.ftz.f32 	%f1846, %f1844, %f1845;
	.loc	22	162	0
	mov.f32 	%f1847, %f1842;
	bra.uni 	$Lt_125_298498;
$Lt_125_298242:
	.loc	22	166	0
	mov.f32 	%f1846, 0f00000000;  	// 0
	mov.f32 	%f1847, 0f00000000;  	// 0
	bra.uni 	$Lt_125_298498;
$Lt_125_297730:
	setp.lt.ftz.f32 	%p110, %f453, %f451;
	@!%p110 bra 	$Lt_125_298754;
	.loc	22	173	0
	max.ftz.f32 	%f1837, %f455, %f456;
	setp.lt.ftz.f32 	%p108, %f455, %f456;
	max.ftz.f32 	%f1838, %f1837, %f457;
	selp.f32 	%f1839, %f455, %f456, %p108;
	setp.lt.ftz.f32 	%p109, %f1839, %f457;
	selp.f32 	%f1840, %f1839, %f457, %p109;
	sub.ftz.f32 	%f1841, %f1838, %f1840;
	cvt.ftz.sat.f32.f32 	%f1842, %f1841;
	sub.ftz.f32 	%f1848, %f452, %f453;
	mul.ftz.f32 	%f1849, %f1842, %f1848;
	sub.ftz.f32 	%f1850, %f451, %f453;
	div.approx.ftz.f32 	%f1847, %f1849, %f1850;
	.loc	22	174	0
	mov.f32 	%f1846, %f1842;
	bra.uni 	$Lt_125_298498;
$Lt_125_298754:
	.loc	22	178	0
	mov.f32 	%f1846, 0f00000000;  	// 0
	mov.f32 	%f1847, 0f00000000;  	// 0
$Lt_125_298498:
$Lt_125_297474:
	mov.f32 	%f1851, 0f00000000;  	// 0
	bra.uni 	$Lt_125_301058;
$Lt_125_297218:
	setp.eq.ftz.f32 	%p111, %f1836, %f452;
	setp.eq.ftz.f32 	%p112, %f1835, %f453;
	@!%p112 bra 	$Lt_125_299266;
	@!%p111 bra 	$Lt_125_299778;
	setp.gt.ftz.f32 	%p113, %f453, %f452;
	@!%p113 bra 	$Lt_125_300290;
	.loc	22	191	0
	max.ftz.f32 	%f1837, %f455, %f456;
	setp.lt.ftz.f32 	%p108, %f455, %f456;
	max.ftz.f32 	%f1838, %f1837, %f457;
	selp.f32 	%f1839, %f455, %f456, %p108;
	setp.lt.ftz.f32 	%p109, %f1839, %f457;
	selp.f32 	%f1840, %f1839, %f457, %p109;
	sub.ftz.f32 	%f1841, %f1838, %f1840;
	cvt.ftz.sat.f32.f32 	%f1842, %f1841;
	sub.ftz.f32 	%f1852, %f451, %f452;
	mul.ftz.f32 	%f1853, %f1842, %f1852;
	sub.ftz.f32 	%f1854, %f453, %f452;
	div.approx.ftz.f32 	%f1846, %f1853, %f1854;
	.loc	22	192	0
	mov.f32 	%f1851, %f1842;
	bra.uni 	$Lt_125_300034;
$Lt_125_300290:
	.loc	22	196	0
	mov.f32 	%f1846, 0f00000000;  	// 0
	mov.f32 	%f1851, 0f00000000;  	// 0
$Lt_125_300034:
	mov.f32 	%f1847, 0f00000000;  	// 0
	bra.uni 	$Lt_125_301058;
$Lt_125_299778:
	setp.gt.ftz.f32 	%p114, %f453, %f451;
	@!%p114 bra 	$Lt_125_300802;
	.loc	22	204	0
	max.ftz.f32 	%f1837, %f455, %f456;
	setp.lt.ftz.f32 	%p108, %f455, %f456;
	max.ftz.f32 	%f1838, %f1837, %f457;
	selp.f32 	%f1839, %f455, %f456, %p108;
	setp.lt.ftz.f32 	%p109, %f1839, %f457;
	selp.f32 	%f1840, %f1839, %f457, %p109;
	sub.ftz.f32 	%f1841, %f1838, %f1840;
	cvt.ftz.sat.f32.f32 	%f1842, %f1841;
	sub.ftz.f32 	%f1855, %f452, %f451;
	mul.ftz.f32 	%f1856, %f1842, %f1855;
	sub.ftz.f32 	%f1857, %f453, %f451;
	div.approx.ftz.f32 	%f1847, %f1856, %f1857;
	.loc	22	205	0
	mov.f32 	%f1851, %f1842;
	bra.uni 	$Lt_125_300546;
$Lt_125_300802:
	.loc	22	209	0
	mov.f32 	%f1851, 0f00000000;  	// 0
	mov.f32 	%f1847, 0f00000000;  	// 0
$Lt_125_300546:
	.loc	22	211	0
	mov.f32 	%f1846, 0f00000000;  	// 0
	bra.uni 	$Lt_125_301058;
$Lt_125_299266:
	@!%p111 bra 	$Lt_125_301314;
	setp.lt.ftz.f32 	%p115, %f452, %f451;
	@!%p115 bra 	$Lt_125_301826;
	.loc	22	220	0
	max.ftz.f32 	%f1837, %f455, %f456;
	setp.lt.ftz.f32 	%p108, %f455, %f456;
	max.ftz.f32 	%f1838, %f1837, %f457;
	selp.f32 	%f1839, %f455, %f456, %p108;
	setp.lt.ftz.f32 	%p109, %f1839, %f457;
	selp.f32 	%f1840, %f1839, %f457, %p109;
	sub.ftz.f32 	%f1841, %f1838, %f1840;
	cvt.ftz.sat.f32.f32 	%f1842, %f1841;
	sub.ftz.f32 	%f1858, %f453, %f452;
	mul.ftz.f32 	%f1859, %f1842, %f1858;
	sub.ftz.f32 	%f1860, %f451, %f452;
	div.approx.ftz.f32 	%f1851, %f1859, %f1860;
	.loc	22	221	0
	mov.f32 	%f1846, %f1842;
	bra.uni 	$Lt_125_301570;
$Lt_125_301826:
	.loc	22	225	0
	mov.f32 	%f1846, 0f00000000;  	// 0
	mov.f32 	%f1851, 0f00000000;  	// 0
$Lt_125_301570:
	mov.f32 	%f1847, 0f00000000;  	// 0
	bra.uni 	$Lt_125_301058;
$Lt_125_301314:
	@!%p103 bra 	$Lt_125_302338;
	.loc	22	233	0
	max.ftz.f32 	%f1837, %f455, %f456;
	setp.lt.ftz.f32 	%p108, %f455, %f456;
	max.ftz.f32 	%f1838, %f1837, %f457;
	selp.f32 	%f1839, %f455, %f456, %p108;
	setp.lt.ftz.f32 	%p109, %f1839, %f457;
	selp.f32 	%f1840, %f1839, %f457, %p109;
	sub.ftz.f32 	%f1841, %f1838, %f1840;
	cvt.ftz.sat.f32.f32 	%f1842, %f1841;
	sub.ftz.f32 	%f1861, %f453, %f451;
	mul.ftz.f32 	%f1862, %f1842, %f1861;
	sub.ftz.f32 	%f1863, %f452, %f451;
	div.approx.ftz.f32 	%f1851, %f1862, %f1863;
	.loc	22	234	0
	mov.f32 	%f1847, %f1842;
	bra.uni 	$Lt_125_302082;
$Lt_125_302338:
	.loc	22	238	0
	mov.f32 	%f1851, 0f00000000;  	// 0
	mov.f32 	%f1847, 0f00000000;  	// 0
$Lt_125_302082:
	.loc	22	240	0
	mov.f32 	%f1846, 0f00000000;  	// 0
$Lt_125_301058:
$Lt_125_299010:
$Lt_125_296962:
	.loc	22	113	0
	ld.const.f32 	%f922, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1864, %f456, %f922;
	mul.ftz.f32 	%f1865, %f1847, %f922;
	ld.const.f32 	%f921, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1866, %f921, %f457, %f1864;
	fma.rn.ftz.f32 	%f1867, %f921, %f1851, %f1865;
	ld.const.f32 	%f920, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1868, %f920, %f455, %f1866;
	fma.rn.ftz.f32 	%f1869, %f920, %f1846, %f1867;
	cvt.ftz.sat.f32.f32 	%f1870, %f1868;
	cvt.ftz.sat.f32.f32 	%f1871, %f1869;
	sub.ftz.f32 	%f1872, %f1870, %f1871;
	add.ftz.f32 	%f1873, %f1872, %f1846;
	mov.f32 	%f1874, %f1873;
	add.ftz.f32 	%f1875, %f1872, %f1847;
	mov.f32 	%f1876, %f1875;
	add.ftz.f32 	%f1877, %f1872, %f1851;
	mov.f32 	%f1878, %f1877;
	.loc	22	50	0
	mul.ftz.f32 	%f1879, %f1875, %f922;
	fma.rn.ftz.f32 	%f1880, %f921, %f1877, %f1879;
	fma.rn.ftz.f32 	%f1881, %f920, %f1873, %f1880;
	cvt.ftz.sat.f32.f32 	%f1882, %f1881;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p116, %f1875, %f1873;
	selp.f32 	%f1883, %f1873, %f1875, %p116;
	setp.lt.ftz.f32 	%p117, %f1883, %f1877;
	selp.f32 	%f1884, %f1883, %f1877, %p117;
	mov.f32 	%f1885, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p118, %f1884, %f1885;
	@!%p118 bra 	$Lt_125_302594;
	.loc	22	119	0
	sub.ftz.f32 	%f1886, %f1882, %f1884;
	sub.ftz.f32 	%f1887, %f1877, %f1882;
	mul.ftz.f32 	%f1888, %f1882, %f1887;
	div.approx.ftz.f32 	%f1889, %f1888, %f1886;
	add.ftz.f32 	%f1878, %f1882, %f1889;
	.loc	22	120	0
	sub.ftz.f32 	%f1890, %f1875, %f1882;
	mul.ftz.f32 	%f1891, %f1882, %f1890;
	div.approx.ftz.f32 	%f1892, %f1891, %f1886;
	add.ftz.f32 	%f1876, %f1882, %f1892;
	.loc	22	121	0
	sub.ftz.f32 	%f1893, %f1873, %f1882;
	mul.ftz.f32 	%f1894, %f1882, %f1893;
	div.approx.ftz.f32 	%f1895, %f1894, %f1886;
	add.ftz.f32 	%f1874, %f1882, %f1895;
$Lt_125_302594:
	max.ftz.f32 	%f1896, %f1875, %f1873;
	max.ftz.f32 	%f1897, %f1896, %f1877;
	mov.f32 	%f1898, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p119, %f1897, %f1898;
	@!%p119 bra 	$Lt_125_303106;
	.loc	27	529	0
	mov.f32 	%f1899, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1900, %f1899, %f1882;
	sub.ftz.f32 	%f1901, %f1897, %f1882;
	sub.ftz.f32 	%f1902, %f1878, %f1882;
	mul.ftz.f32 	%f1903, %f1900, %f1902;
	div.approx.ftz.f32 	%f1904, %f1903, %f1901;
	.loc	22	125	0
	add.ftz.f32 	%f1878, %f1904, %f1882;
	.loc	27	529	0
	sub.ftz.f32 	%f1905, %f1876, %f1882;
	mul.ftz.f32 	%f1906, %f1900, %f1905;
	div.approx.ftz.f32 	%f1907, %f1906, %f1901;
	.loc	22	126	0
	add.ftz.f32 	%f1876, %f1907, %f1882;
	.loc	27	529	0
	sub.ftz.f32 	%f1908, %f1874, %f1882;
	mul.ftz.f32 	%f1909, %f1900, %f1908;
	div.approx.ftz.f32 	%f1910, %f1909, %f1901;
	.loc	22	127	0
	add.ftz.f32 	%f1874, %f1910, %f1882;
$Lt_125_303106:
	.loc	22	468	0
	ld.param.f32 	%f1911, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f1911, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f1912, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f1912;
	mov.f32 	%f1913, %f589;
	mov.f32 	%f1914, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1915, %f589, %f1914;
	mov.f32 	%f1916, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p120, %f1915, %f1916;
	@!%p120 bra 	$Lt_125_303874;
	mov.f32 	%f1917, 0f00000000;  	// 0
	mov.f32 	%f1918, 0f00000000;  	// 0
	mov.f32 	%f1919, 0f00000000;  	// 0
	mov.f32 	%f1913, 0f00000000;  	// 0
	bra.uni 	$Lt_125_303618;
$Lt_125_303874:
	mov.f32 	%f1920, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f1920, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f1921, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1922, %f586, %f599;
	sub.ftz.f32 	%f603, %f1921, %f1922;
	mul.ftz.f32 	%f1923, %f598, %f1874;
	fma.rn.ftz.f32 	%f1924, %f1874, %f458, %f1923;
	mul.ftz.f32 	%f1925, %f600, %f1924;
	fma.rn.ftz.f32 	%f1919, %f455, %f603, %f1925;
	mul.ftz.f32 	%f1926, %f598, %f1876;
	fma.rn.ftz.f32 	%f1927, %f1876, %f458, %f1926;
	mul.ftz.f32 	%f1928, %f600, %f1927;
	fma.rn.ftz.f32 	%f1918, %f456, %f603, %f1928;
	mul.ftz.f32 	%f1929, %f598, %f1878;
	fma.rn.ftz.f32 	%f1930, %f1878, %f458, %f1929;
	mul.ftz.f32 	%f1931, %f600, %f1930;
	fma.rn.ftz.f32 	%f1917, %f457, %f603, %f1931;
$Lt_125_303618:
	.loc	6	213	0
	mov.f32 	%f451, %f1919;
	mov.f32 	%f452, %f1918;
	mov.f32 	%f453, %f1917;
	mov.f32 	%f454, %f1913;
	bra.uni 	$Lt_125_314370;
$Lt_125_6402:
	.loc	22	154	0
	max.ftz.f32 	%f1837, %f455, %f456;
	setp.lt.ftz.f32 	%p108, %f455, %f456;
	max.ftz.f32 	%f1838, %f1837, %f457;
	selp.f32 	%f1839, %f455, %f456, %p108;
	setp.lt.ftz.f32 	%p109, %f1839, %f457;
	selp.f32 	%f1840, %f1839, %f457, %p109;
	setp.eq.ftz.f32 	%p121, %f1840, %f457;
	@!%p121 bra 	$Lt_125_304386;
	setp.eq.ftz.f32 	%p122, %f1838, %f456;
	@!%p122 bra 	$Lt_125_304898;
	setp.gt.ftz.f32 	%p123, %f456, %f457;
	@!%p123 bra 	$Lt_125_305410;
	.loc	22	161	0
	setp.gt.ftz.f32 	%p103, %f452, %f451;
	max.ftz.f32 	%f1833, %f452, %f451;
	selp.f32 	%f1834, %f451, %f452, %p103;
	max.ftz.f32 	%f1835, %f1833, %f453;
	setp.lt.ftz.f32 	%p104, %f1834, %f453;
	selp.f32 	%f1836, %f1834, %f453, %p104;
	sub.ftz.f32 	%f1932, %f1835, %f1836;
	cvt.ftz.sat.f32.f32 	%f1933, %f1932;
	sub.ftz.f32 	%f1934, %f455, %f457;
	mul.ftz.f32 	%f1935, %f1933, %f1934;
	sub.ftz.f32 	%f1936, %f456, %f457;
	div.approx.ftz.f32 	%f1937, %f1935, %f1936;
	.loc	22	162	0
	mov.f32 	%f1938, %f1933;
	bra.uni 	$Lt_125_305666;
$Lt_125_305410:
	.loc	22	166	0
	mov.f32 	%f1937, 0f00000000;  	// 0
	mov.f32 	%f1938, 0f00000000;  	// 0
	bra.uni 	$Lt_125_305666;
$Lt_125_304898:
	setp.gt.ftz.f32 	%p124, %f455, %f457;
	@!%p124 bra 	$Lt_125_305922;
	.loc	22	173	0
	setp.gt.ftz.f32 	%p103, %f452, %f451;
	max.ftz.f32 	%f1833, %f452, %f451;
	selp.f32 	%f1834, %f451, %f452, %p103;
	max.ftz.f32 	%f1835, %f1833, %f453;
	setp.lt.ftz.f32 	%p104, %f1834, %f453;
	selp.f32 	%f1836, %f1834, %f453, %p104;
	sub.ftz.f32 	%f1932, %f1835, %f1836;
	cvt.ftz.sat.f32.f32 	%f1933, %f1932;
	sub.ftz.f32 	%f1939, %f456, %f457;
	mul.ftz.f32 	%f1940, %f1933, %f1939;
	sub.ftz.f32 	%f1941, %f455, %f457;
	div.approx.ftz.f32 	%f1938, %f1940, %f1941;
	.loc	22	174	0
	mov.f32 	%f1937, %f1933;
	bra.uni 	$Lt_125_305666;
$Lt_125_305922:
	.loc	22	178	0
	mov.f32 	%f1937, 0f00000000;  	// 0
	mov.f32 	%f1938, 0f00000000;  	// 0
$Lt_125_305666:
$Lt_125_304642:
	mov.f32 	%f1942, 0f00000000;  	// 0
	bra.uni 	$Lt_125_308226;
$Lt_125_304386:
	setp.eq.ftz.f32 	%p125, %f1840, %f456;
	setp.eq.ftz.f32 	%p126, %f1838, %f457;
	@!%p126 bra 	$Lt_125_306434;
	@!%p125 bra 	$Lt_125_306946;
	setp.lt.ftz.f32 	%p127, %f456, %f457;
	@!%p127 bra 	$Lt_125_307458;
	.loc	22	191	0
	setp.gt.ftz.f32 	%p103, %f452, %f451;
	max.ftz.f32 	%f1833, %f452, %f451;
	selp.f32 	%f1834, %f451, %f452, %p103;
	max.ftz.f32 	%f1835, %f1833, %f453;
	setp.lt.ftz.f32 	%p104, %f1834, %f453;
	selp.f32 	%f1836, %f1834, %f453, %p104;
	sub.ftz.f32 	%f1932, %f1835, %f1836;
	cvt.ftz.sat.f32.f32 	%f1933, %f1932;
	sub.ftz.f32 	%f1943, %f455, %f456;
	mul.ftz.f32 	%f1944, %f1933, %f1943;
	sub.ftz.f32 	%f1945, %f457, %f456;
	div.approx.ftz.f32 	%f1937, %f1944, %f1945;
	.loc	22	192	0
	mov.f32 	%f1942, %f1933;
	bra.uni 	$Lt_125_307202;
$Lt_125_307458:
	.loc	22	196	0
	mov.f32 	%f1937, 0f00000000;  	// 0
	mov.f32 	%f1942, 0f00000000;  	// 0
$Lt_125_307202:
	mov.f32 	%f1938, 0f00000000;  	// 0
	bra.uni 	$Lt_125_308226;
$Lt_125_306946:
	setp.lt.ftz.f32 	%p128, %f455, %f457;
	@!%p128 bra 	$Lt_125_307970;
	.loc	22	204	0
	setp.gt.ftz.f32 	%p103, %f452, %f451;
	max.ftz.f32 	%f1833, %f452, %f451;
	selp.f32 	%f1834, %f451, %f452, %p103;
	max.ftz.f32 	%f1835, %f1833, %f453;
	setp.lt.ftz.f32 	%p104, %f1834, %f453;
	selp.f32 	%f1836, %f1834, %f453, %p104;
	sub.ftz.f32 	%f1932, %f1835, %f1836;
	cvt.ftz.sat.f32.f32 	%f1933, %f1932;
	sub.ftz.f32 	%f1946, %f456, %f455;
	mul.ftz.f32 	%f1947, %f1933, %f1946;
	sub.ftz.f32 	%f1948, %f457, %f455;
	div.approx.ftz.f32 	%f1938, %f1947, %f1948;
	.loc	22	205	0
	mov.f32 	%f1942, %f1933;
	bra.uni 	$Lt_125_307714;
$Lt_125_307970:
	.loc	22	209	0
	mov.f32 	%f1942, 0f00000000;  	// 0
	mov.f32 	%f1938, 0f00000000;  	// 0
$Lt_125_307714:
	.loc	22	211	0
	mov.f32 	%f1937, 0f00000000;  	// 0
	bra.uni 	$Lt_125_308226;
$Lt_125_306434:
	@!%p125 bra 	$Lt_125_308482;
	setp.gt.ftz.f32 	%p129, %f455, %f456;
	@!%p129 bra 	$Lt_125_308994;
	.loc	22	220	0
	setp.gt.ftz.f32 	%p103, %f452, %f451;
	max.ftz.f32 	%f1833, %f452, %f451;
	selp.f32 	%f1834, %f451, %f452, %p103;
	max.ftz.f32 	%f1835, %f1833, %f453;
	setp.lt.ftz.f32 	%p104, %f1834, %f453;
	selp.f32 	%f1836, %f1834, %f453, %p104;
	sub.ftz.f32 	%f1932, %f1835, %f1836;
	cvt.ftz.sat.f32.f32 	%f1933, %f1932;
	sub.ftz.f32 	%f1949, %f457, %f456;
	mul.ftz.f32 	%f1950, %f1933, %f1949;
	sub.ftz.f32 	%f1951, %f455, %f456;
	div.approx.ftz.f32 	%f1942, %f1950, %f1951;
	.loc	22	221	0
	mov.f32 	%f1937, %f1933;
	bra.uni 	$Lt_125_308738;
$Lt_125_308994:
	.loc	22	225	0
	mov.f32 	%f1937, 0f00000000;  	// 0
	mov.f32 	%f1942, 0f00000000;  	// 0
$Lt_125_308738:
	mov.f32 	%f1938, 0f00000000;  	// 0
	bra.uni 	$Lt_125_308226;
$Lt_125_308482:
	@!%p108 bra 	$Lt_125_309506;
	.loc	22	233	0
	setp.gt.ftz.f32 	%p103, %f452, %f451;
	max.ftz.f32 	%f1833, %f452, %f451;
	selp.f32 	%f1834, %f451, %f452, %p103;
	max.ftz.f32 	%f1835, %f1833, %f453;
	setp.lt.ftz.f32 	%p104, %f1834, %f453;
	selp.f32 	%f1836, %f1834, %f453, %p104;
	sub.ftz.f32 	%f1932, %f1835, %f1836;
	cvt.ftz.sat.f32.f32 	%f1933, %f1932;
	sub.ftz.f32 	%f1952, %f457, %f455;
	mul.ftz.f32 	%f1953, %f1933, %f1952;
	sub.ftz.f32 	%f1954, %f456, %f455;
	div.approx.ftz.f32 	%f1942, %f1953, %f1954;
	.loc	22	234	0
	mov.f32 	%f1938, %f1933;
	bra.uni 	$Lt_125_309250;
$Lt_125_309506:
	.loc	22	238	0
	mov.f32 	%f1942, 0f00000000;  	// 0
	mov.f32 	%f1938, 0f00000000;  	// 0
$Lt_125_309250:
	.loc	22	240	0
	mov.f32 	%f1937, 0f00000000;  	// 0
$Lt_125_308226:
$Lt_125_306178:
$Lt_125_304130:
	.loc	22	113	0
	ld.const.f32 	%f922, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1955, %f456, %f922;
	mul.ftz.f32 	%f1956, %f1938, %f922;
	ld.const.f32 	%f921, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1957, %f921, %f457, %f1955;
	fma.rn.ftz.f32 	%f1958, %f921, %f1942, %f1956;
	ld.const.f32 	%f920, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1959, %f920, %f455, %f1957;
	fma.rn.ftz.f32 	%f1960, %f920, %f1937, %f1958;
	cvt.ftz.sat.f32.f32 	%f1961, %f1959;
	cvt.ftz.sat.f32.f32 	%f1962, %f1960;
	sub.ftz.f32 	%f1963, %f1961, %f1962;
	add.ftz.f32 	%f1964, %f1963, %f1937;
	mov.f32 	%f1965, %f1964;
	add.ftz.f32 	%f1966, %f1963, %f1938;
	mov.f32 	%f1967, %f1966;
	add.ftz.f32 	%f1968, %f1963, %f1942;
	mov.f32 	%f1969, %f1968;
	.loc	22	50	0
	mul.ftz.f32 	%f1970, %f1966, %f922;
	fma.rn.ftz.f32 	%f1971, %f921, %f1968, %f1970;
	fma.rn.ftz.f32 	%f1972, %f920, %f1964, %f1971;
	cvt.ftz.sat.f32.f32 	%f1973, %f1972;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p130, %f1966, %f1964;
	selp.f32 	%f1974, %f1964, %f1966, %p130;
	setp.lt.ftz.f32 	%p131, %f1974, %f1968;
	selp.f32 	%f1975, %f1974, %f1968, %p131;
	mov.f32 	%f1976, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p132, %f1975, %f1976;
	@!%p132 bra 	$Lt_125_309762;
	.loc	22	119	0
	sub.ftz.f32 	%f1977, %f1973, %f1975;
	sub.ftz.f32 	%f1978, %f1968, %f1973;
	mul.ftz.f32 	%f1979, %f1973, %f1978;
	div.approx.ftz.f32 	%f1980, %f1979, %f1977;
	add.ftz.f32 	%f1969, %f1973, %f1980;
	.loc	22	120	0
	sub.ftz.f32 	%f1981, %f1966, %f1973;
	mul.ftz.f32 	%f1982, %f1973, %f1981;
	div.approx.ftz.f32 	%f1983, %f1982, %f1977;
	add.ftz.f32 	%f1967, %f1973, %f1983;
	.loc	22	121	0
	sub.ftz.f32 	%f1984, %f1964, %f1973;
	mul.ftz.f32 	%f1985, %f1973, %f1984;
	div.approx.ftz.f32 	%f1986, %f1985, %f1977;
	add.ftz.f32 	%f1965, %f1973, %f1986;
$Lt_125_309762:
	max.ftz.f32 	%f1987, %f1966, %f1964;
	max.ftz.f32 	%f1988, %f1987, %f1968;
	mov.f32 	%f1989, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p133, %f1988, %f1989;
	@!%p133 bra 	$Lt_125_310274;
	.loc	27	529	0
	mov.f32 	%f1990, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1991, %f1990, %f1973;
	sub.ftz.f32 	%f1992, %f1988, %f1973;
	sub.ftz.f32 	%f1993, %f1969, %f1973;
	mul.ftz.f32 	%f1994, %f1991, %f1993;
	div.approx.ftz.f32 	%f1995, %f1994, %f1992;
	.loc	22	125	0
	add.ftz.f32 	%f1969, %f1995, %f1973;
	.loc	27	529	0
	sub.ftz.f32 	%f1996, %f1967, %f1973;
	mul.ftz.f32 	%f1997, %f1991, %f1996;
	div.approx.ftz.f32 	%f1998, %f1997, %f1992;
	.loc	22	126	0
	add.ftz.f32 	%f1967, %f1998, %f1973;
	.loc	27	529	0
	sub.ftz.f32 	%f1999, %f1965, %f1973;
	mul.ftz.f32 	%f2000, %f1991, %f1999;
	div.approx.ftz.f32 	%f2001, %f2000, %f1992;
	.loc	22	127	0
	add.ftz.f32 	%f1965, %f2001, %f1973;
$Lt_125_310274:
	.loc	22	468	0
	ld.param.f32 	%f2002, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f2002, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f2003, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f2003;
	mov.f32 	%f2004, %f589;
	mov.f32 	%f2005, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f2006, %f589, %f2005;
	mov.f32 	%f2007, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p134, %f2006, %f2007;
	@!%p134 bra 	$Lt_125_311042;
	mov.f32 	%f2008, 0f00000000;  	// 0
	mov.f32 	%f2009, 0f00000000;  	// 0
	mov.f32 	%f2010, 0f00000000;  	// 0
	mov.f32 	%f2004, 0f00000000;  	// 0
	bra.uni 	$Lt_125_310786;
$Lt_125_311042:
	mov.f32 	%f2011, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f2011, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f2012, 0f3f800000;  	// 1
	mul.ftz.f32 	%f2013, %f586, %f599;
	sub.ftz.f32 	%f603, %f2012, %f2013;
	mul.ftz.f32 	%f2014, %f598, %f1965;
	fma.rn.ftz.f32 	%f2015, %f1965, %f458, %f2014;
	mul.ftz.f32 	%f2016, %f600, %f2015;
	fma.rn.ftz.f32 	%f2010, %f455, %f603, %f2016;
	mul.ftz.f32 	%f2017, %f598, %f1967;
	fma.rn.ftz.f32 	%f2018, %f1967, %f458, %f2017;
	mul.ftz.f32 	%f2019, %f600, %f2018;
	fma.rn.ftz.f32 	%f2009, %f456, %f603, %f2019;
	mul.ftz.f32 	%f2020, %f598, %f1969;
	fma.rn.ftz.f32 	%f2021, %f1969, %f458, %f2020;
	mul.ftz.f32 	%f2022, %f600, %f2021;
	fma.rn.ftz.f32 	%f2008, %f457, %f603, %f2022;
$Lt_125_310786:
	.loc	6	214	0
	mov.f32 	%f451, %f2010;
	mov.f32 	%f452, %f2009;
	mov.f32 	%f453, %f2008;
	mov.f32 	%f454, %f2004;
	bra.uni 	$Lt_125_314370;
$Lt_125_6658:
	.loc	22	113	0
	ld.const.f32 	%f922, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f2023, %f922, %f452;
	mul.ftz.f32 	%f2024, %f456, %f922;
	ld.const.f32 	%f921, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f2025, %f921, %f453, %f2023;
	fma.rn.ftz.f32 	%f2026, %f921, %f457, %f2024;
	ld.const.f32 	%f920, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f2027, %f920, %f451, %f2025;
	fma.rn.ftz.f32 	%f2028, %f920, %f455, %f2026;
	cvt.ftz.sat.f32.f32 	%f2029, %f2027;
	cvt.ftz.sat.f32.f32 	%f2030, %f2028;
	sub.ftz.f32 	%f2031, %f2030, %f2029;
	add.ftz.f32 	%f2032, %f2031, %f451;
	mov.f32 	%f2033, %f2032;
	add.ftz.f32 	%f2034, %f2031, %f452;
	mov.f32 	%f2035, %f2034;
	add.ftz.f32 	%f2036, %f2031, %f453;
	mov.f32 	%f2037, %f2036;
	.loc	22	50	0
	mul.ftz.f32 	%f2038, %f2034, %f922;
	fma.rn.ftz.f32 	%f2039, %f921, %f2036, %f2038;
	fma.rn.ftz.f32 	%f2040, %f920, %f2032, %f2039;
	cvt.ftz.sat.f32.f32 	%f2041, %f2040;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p135, %f2034, %f2032;
	selp.f32 	%f2042, %f2032, %f2034, %p135;
	setp.lt.ftz.f32 	%p136, %f2042, %f2036;
	selp.f32 	%f2043, %f2042, %f2036, %p136;
	mov.f32 	%f2044, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p137, %f2043, %f2044;
	@!%p137 bra 	$Lt_125_311298;
	.loc	22	119	0
	sub.ftz.f32 	%f2045, %f2041, %f2043;
	sub.ftz.f32 	%f2046, %f2036, %f2041;
	mul.ftz.f32 	%f2047, %f2041, %f2046;
	div.approx.ftz.f32 	%f2048, %f2047, %f2045;
	add.ftz.f32 	%f2037, %f2041, %f2048;
	.loc	22	120	0
	sub.ftz.f32 	%f2049, %f2034, %f2041;
	mul.ftz.f32 	%f2050, %f2041, %f2049;
	div.approx.ftz.f32 	%f2051, %f2050, %f2045;
	add.ftz.f32 	%f2035, %f2041, %f2051;
	.loc	22	121	0
	sub.ftz.f32 	%f2052, %f2032, %f2041;
	mul.ftz.f32 	%f2053, %f2041, %f2052;
	div.approx.ftz.f32 	%f2054, %f2053, %f2045;
	add.ftz.f32 	%f2033, %f2041, %f2054;
$Lt_125_311298:
	max.ftz.f32 	%f2055, %f2034, %f2032;
	max.ftz.f32 	%f2056, %f2055, %f2036;
	mov.f32 	%f2057, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p138, %f2056, %f2057;
	@!%p138 bra 	$Lt_125_311810;
	.loc	27	529	0
	mov.f32 	%f2058, 0f3f800000;  	// 1
	sub.ftz.f32 	%f2059, %f2058, %f2041;
	sub.ftz.f32 	%f2060, %f2056, %f2041;
	sub.ftz.f32 	%f2061, %f2037, %f2041;
	mul.ftz.f32 	%f2062, %f2059, %f2061;
	div.approx.ftz.f32 	%f2063, %f2062, %f2060;
	.loc	22	125	0
	add.ftz.f32 	%f2037, %f2063, %f2041;
	.loc	27	529	0
	sub.ftz.f32 	%f2064, %f2035, %f2041;
	mul.ftz.f32 	%f2065, %f2059, %f2064;
	div.approx.ftz.f32 	%f2066, %f2065, %f2060;
	.loc	22	126	0
	add.ftz.f32 	%f2035, %f2066, %f2041;
	.loc	27	529	0
	sub.ftz.f32 	%f2067, %f2033, %f2041;
	mul.ftz.f32 	%f2068, %f2059, %f2067;
	div.approx.ftz.f32 	%f2069, %f2068, %f2060;
	.loc	22	127	0
	add.ftz.f32 	%f2033, %f2069, %f2041;
$Lt_125_311810:
	.loc	22	468	0
	ld.param.f32 	%f2070, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f2070, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f2071, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f2071;
	mov.f32 	%f2072, %f589;
	mov.f32 	%f2073, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f2074, %f589, %f2073;
	mov.f32 	%f2075, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p139, %f2074, %f2075;
	@!%p139 bra 	$Lt_125_312578;
	mov.f32 	%f2076, 0f00000000;  	// 0
	mov.f32 	%f2077, 0f00000000;  	// 0
	mov.f32 	%f2078, 0f00000000;  	// 0
	mov.f32 	%f2072, 0f00000000;  	// 0
	bra.uni 	$Lt_125_312322;
$Lt_125_312578:
	mov.f32 	%f2079, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f2079, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f2080, 0f3f800000;  	// 1
	mul.ftz.f32 	%f2081, %f586, %f599;
	sub.ftz.f32 	%f603, %f2080, %f2081;
	mul.ftz.f32 	%f2082, %f598, %f2033;
	fma.rn.ftz.f32 	%f2083, %f2033, %f458, %f2082;
	mul.ftz.f32 	%f2084, %f600, %f2083;
	fma.rn.ftz.f32 	%f2078, %f455, %f603, %f2084;
	mul.ftz.f32 	%f2085, %f598, %f2035;
	fma.rn.ftz.f32 	%f2086, %f2035, %f458, %f2085;
	mul.ftz.f32 	%f2087, %f600, %f2086;
	fma.rn.ftz.f32 	%f2077, %f456, %f603, %f2087;
	mul.ftz.f32 	%f2088, %f598, %f2037;
	fma.rn.ftz.f32 	%f2089, %f2037, %f458, %f2088;
	mul.ftz.f32 	%f2090, %f600, %f2089;
	fma.rn.ftz.f32 	%f2076, %f457, %f603, %f2090;
$Lt_125_312322:
	.loc	6	215	0
	mov.f32 	%f451, %f2078;
	mov.f32 	%f452, %f2077;
	mov.f32 	%f453, %f2076;
	mov.f32 	%f454, %f2072;
	bra.uni 	$Lt_125_314370;
$Lt_125_6914:
	.loc	22	113	0
	ld.const.f32 	%f922, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f2091, %f922, %f452;
	mul.ftz.f32 	%f2092, %f456, %f922;
	ld.const.f32 	%f921, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f2093, %f921, %f453, %f2091;
	fma.rn.ftz.f32 	%f2094, %f921, %f457, %f2092;
	ld.const.f32 	%f920, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f2095, %f920, %f451, %f2093;
	fma.rn.ftz.f32 	%f2096, %f920, %f455, %f2094;
	cvt.ftz.sat.f32.f32 	%f2097, %f2095;
	cvt.ftz.sat.f32.f32 	%f2098, %f2096;
	sub.ftz.f32 	%f2099, %f2097, %f2098;
	add.ftz.f32 	%f2100, %f2099, %f455;
	mov.f32 	%f2101, %f2100;
	add.ftz.f32 	%f2102, %f2099, %f456;
	mov.f32 	%f2103, %f2102;
	add.ftz.f32 	%f2104, %f2099, %f457;
	mov.f32 	%f2105, %f2104;
	.loc	22	50	0
	mul.ftz.f32 	%f2106, %f2102, %f922;
	fma.rn.ftz.f32 	%f2107, %f921, %f2104, %f2106;
	fma.rn.ftz.f32 	%f2108, %f920, %f2100, %f2107;
	cvt.ftz.sat.f32.f32 	%f2109, %f2108;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p140, %f2102, %f2100;
	selp.f32 	%f2110, %f2100, %f2102, %p140;
	setp.lt.ftz.f32 	%p141, %f2110, %f2104;
	selp.f32 	%f2111, %f2110, %f2104, %p141;
	mov.f32 	%f2112, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p142, %f2111, %f2112;
	@!%p142 bra 	$Lt_125_312834;
	.loc	22	119	0
	sub.ftz.f32 	%f2113, %f2109, %f2111;
	sub.ftz.f32 	%f2114, %f2104, %f2109;
	mul.ftz.f32 	%f2115, %f2109, %f2114;
	div.approx.ftz.f32 	%f2116, %f2115, %f2113;
	add.ftz.f32 	%f2105, %f2109, %f2116;
	.loc	22	120	0
	sub.ftz.f32 	%f2117, %f2102, %f2109;
	mul.ftz.f32 	%f2118, %f2109, %f2117;
	div.approx.ftz.f32 	%f2119, %f2118, %f2113;
	add.ftz.f32 	%f2103, %f2109, %f2119;
	.loc	22	121	0
	sub.ftz.f32 	%f2120, %f2100, %f2109;
	mul.ftz.f32 	%f2121, %f2109, %f2120;
	div.approx.ftz.f32 	%f2122, %f2121, %f2113;
	add.ftz.f32 	%f2101, %f2109, %f2122;
$Lt_125_312834:
	max.ftz.f32 	%f2123, %f2102, %f2100;
	max.ftz.f32 	%f2124, %f2123, %f2104;
	mov.f32 	%f2125, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p143, %f2124, %f2125;
	@!%p143 bra 	$Lt_125_313346;
	.loc	27	529	0
	mov.f32 	%f2126, 0f3f800000;  	// 1
	sub.ftz.f32 	%f2127, %f2126, %f2109;
	sub.ftz.f32 	%f2128, %f2124, %f2109;
	sub.ftz.f32 	%f2129, %f2105, %f2109;
	mul.ftz.f32 	%f2130, %f2127, %f2129;
	div.approx.ftz.f32 	%f2131, %f2130, %f2128;
	.loc	22	125	0
	add.ftz.f32 	%f2105, %f2131, %f2109;
	.loc	27	529	0
	sub.ftz.f32 	%f2132, %f2103, %f2109;
	mul.ftz.f32 	%f2133, %f2127, %f2132;
	div.approx.ftz.f32 	%f2134, %f2133, %f2128;
	.loc	22	126	0
	add.ftz.f32 	%f2103, %f2134, %f2109;
	.loc	27	529	0
	sub.ftz.f32 	%f2135, %f2101, %f2109;
	mul.ftz.f32 	%f2136, %f2127, %f2135;
	div.approx.ftz.f32 	%f2137, %f2136, %f2128;
	.loc	22	127	0
	add.ftz.f32 	%f2101, %f2137, %f2109;
$Lt_125_313346:
	.loc	22	468	0
	ld.param.f32 	%f2138, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f586, %f2138, %f454;
	add.ftz.f32 	%f587, %f586, %f458;
	mul.ftz.f32 	%f2139, %f586, %f458;
	sub.ftz.f32 	%f589, %f587, %f2139;
	mov.f32 	%f2140, %f589;
	mov.f32 	%f2141, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f2142, %f589, %f2141;
	mov.f32 	%f2143, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p144, %f2142, %f2143;
	@!%p144 bra 	$Lt_125_314114;
	mov.f32 	%f2144, 0f00000000;  	// 0
	mov.f32 	%f2145, 0f00000000;  	// 0
	mov.f32 	%f2146, 0f00000000;  	// 0
	mov.f32 	%f2140, 0f00000000;  	// 0
	bra.uni 	$Lt_125_313858;
$Lt_125_314114:
	mov.f32 	%f2147, 0f3f800000;  	// 1
	sub.ftz.f32 	%f598, %f2147, %f458;
	rcp.approx.ftz.f32 	%f599, %f589;
	mul.ftz.f32 	%f600, %f599, %f586;
	mov.f32 	%f2148, 0f3f800000;  	// 1
	mul.ftz.f32 	%f2149, %f586, %f599;
	sub.ftz.f32 	%f603, %f2148, %f2149;
	mul.ftz.f32 	%f2150, %f598, %f2101;
	fma.rn.ftz.f32 	%f2151, %f2101, %f458, %f2150;
	mul.ftz.f32 	%f2152, %f600, %f2151;
	fma.rn.ftz.f32 	%f2146, %f455, %f603, %f2152;
	mul.ftz.f32 	%f2153, %f598, %f2103;
	fma.rn.ftz.f32 	%f2154, %f2103, %f458, %f2153;
	mul.ftz.f32 	%f2155, %f600, %f2154;
	fma.rn.ftz.f32 	%f2145, %f456, %f603, %f2155;
	mul.ftz.f32 	%f2156, %f598, %f2105;
	fma.rn.ftz.f32 	%f2157, %f2105, %f458, %f2156;
	mul.ftz.f32 	%f2158, %f600, %f2157;
	fma.rn.ftz.f32 	%f2144, %f457, %f603, %f2158;
$Lt_125_313858:
	.loc	6	216	0
	mov.f32 	%f451, %f2146;
	mov.f32 	%f452, %f2145;
	mov.f32 	%f453, %f2144;
	mov.f32 	%f454, %f2140;
	bra.uni 	$Lt_125_314370;
$Lt_125_269570:
	.loc	6	218	0
	@!%p5 bra 	$Lt_125_314370;
	ld.param.f32 	%f2159, [__cudaparm_cuda_motion_renderquad_alphaGain];
	mul.ftz.f32 	%f2160, %f2159, %f454;
	mov.f32 	%f2161, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f2162, %f2160, %f2161;
	mov.f32 	%f2163, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p145, %f2162, %f2163;
	@!%p145 bra 	$Lt_125_315138;
	mov.f32 	%f453, 0f00000000;   	// 0
	mov.f32 	%f452, 0f00000000;   	// 0
	mov.f32 	%f451, 0f00000000;   	// 0
	bra.uni 	$Lt_125_314882;
$Lt_125_315138:
	.loc	6	238	0
	rcp.approx.ftz.f32 	%f2164, %f454;
	mul.ftz.f32 	%f2165, %f2164, %f451;
	mov.f32 	%f2166, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p146, %f2165, %f2166;
	@!%p146 bra 	$Lt_125_315394;
	.loc	5	242	0
	neg.ftz.f32 	%f2167, %f2165;
	lg2.approx.ftz.f32 	%f2168, %f2167;
	mov.f32 	%f2169, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f2170, %f2168, %f2169;
	ex2.approx.ftz.f32 	%f2171, %f2170;
	neg.ftz.f32 	%f2172, %f2171;
	bra.uni 	$LDWendi___log2f_302_34;
$Lt_125_315394:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f2173, %f2165;
	mov.f32 	%f2174, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f2175, %f2173, %f2174;
	ex2.approx.ftz.f32 	%f2172, %f2175;
$LDWendi___log2f_302_34:
	.loc	6	238	0
	mov.f32 	%f451, %f2172;
	.loc	6	239	0
	mul.ftz.f32 	%f2176, %f2164, %f452;
	mov.f32 	%f2177, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p147, %f2176, %f2177;
	@!%p147 bra 	$Lt_125_315906;
	.loc	5	242	0
	neg.ftz.f32 	%f2178, %f2176;
	lg2.approx.ftz.f32 	%f2179, %f2178;
	mov.f32 	%f2180, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f2181, %f2179, %f2180;
	ex2.approx.ftz.f32 	%f2182, %f2181;
	neg.ftz.f32 	%f2183, %f2182;
	bra.uni 	$LDWendi___log2f_302_32;
$Lt_125_315906:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f2184, %f2176;
	mov.f32 	%f2185, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f2186, %f2184, %f2185;
	ex2.approx.ftz.f32 	%f2183, %f2186;
$LDWendi___log2f_302_32:
	.loc	6	239	0
	mov.f32 	%f452, %f2183;
	.loc	6	240	0
	mul.ftz.f32 	%f2187, %f2164, %f453;
	mov.f32 	%f2188, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p148, %f2187, %f2188;
	@!%p148 bra 	$Lt_125_316418;
	.loc	5	242	0
	neg.ftz.f32 	%f2189, %f2187;
	lg2.approx.ftz.f32 	%f2190, %f2189;
	mov.f32 	%f2191, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f2192, %f2190, %f2191;
	ex2.approx.ftz.f32 	%f2193, %f2192;
	neg.ftz.f32 	%f2194, %f2193;
	bra.uni 	$LDWendi___log2f_302_30;
$Lt_125_316418:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f2195, %f2187;
	mov.f32 	%f2196, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f2197, %f2195, %f2196;
	ex2.approx.ftz.f32 	%f2194, %f2197;
$LDWendi___log2f_302_30:
	.loc	6	240	0
	mov.f32 	%f453, %f2194;
$Lt_125_314882:
	.loc	6	243	0
	mov.f32 	%f454, %f2160;
$Lt_125_314370:
$Lt_125_269314:
	@!%p6 bra 	$Lt_125_317186;
	.loc	21	126	0
	mul.lo.u64 	%rd13, %rd7, 8;
	add.u64 	%rd14, %rd8, %rd13;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f451;
	mov.b32		%r116, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f452;
	mov.b32		%r117, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f453;
	mov.b32		%r118, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f454;
	mov.b32		%r119, %b1; }
	st.global.v4.u16 	[%rd14+0], {%r116,%r117,%r118,%r119};
	.loc	6	246	0
	bra.uni 	$LBB381_cuda_motion_renderquad;
$Lt_125_317186:
	.loc	21	126	0
	mul.lo.u64 	%rd15, %rd7, 16;
	add.u64 	%rd16, %rd8, %rd15;
	st.global.v4.f32 	[%rd16+0], {%f451,%f452,%f453,%f454};
$LBB381_cuda_motion_renderquad:
	.loc	6	537	0
	exit;
$LDWend_cuda_motion_renderquad:
	} // cuda_motion_renderquad

	.entry cuda_motion_blitquad (
		.param .u64 __cudaparm_cuda_motion_blitquad_srcFrame,
		.param .u64 __cudaparm_cuda_motion_blitquad_dstFrame,
		.param .u32 __cudaparm_cuda_motion_blitquad_inDeviceFormat,
		.param .s32 __cudaparm_cuda_motion_blitquad_width,
		.param .s32 __cudaparm_cuda_motion_blitquad_height,
		.param .s32 __cudaparm_cuda_motion_blitquad_tWidth,
		.param .s32 __cudaparm_cuda_motion_blitquad_tHeight,
		.param .s32 __cudaparm_cuda_motion_blitquad_srcPitch,
		.param .s32 __cudaparm_cuda_motion_blitquad_dstPitch,
		.param .u32 __cudaparm_cuda_motion_blitquad_blendMode,
		.param .s8 __cudaparm_cuda_motion_blitquad_inDoCompositeOver,
		.param .f32 __cudaparm_cuda_motion_blitquad_alphaGain,
		.param .align 8 .b8 __cudaparm_cuda_motion_blitquad___val_paramquad[120])
	{
	.reg .u32 %r<141>;
	.reg .u64 %rd<24>;
	.reg .f32 %f<1737>;
	.reg .pred %p<150>;
	.loc	6	552	0
$LDWbegin_cuda_motion_blitquad:
	.loc	6	555	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_cuda_motion_blitquad_width];
	ld.param.s32 	%r12, [__cudaparm_cuda_motion_blitquad_height];
	set.le.u32.s32 	%r13, %r12, %r10;
	neg.s32 	%r14, %r13;
	set.le.u32.s32 	%r15, %r11, %r8;
	neg.s32 	%r16, %r15;
	or.b32 	%r17, %r14, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_126_271362;
	bra.uni 	$LBB358_cuda_motion_blitquad;
$Lt_126_271362:
	.loc	6	561	0
	cvt.rn.f32.s32 	%f1, %r8;
	ld.param.f32 	%f2, [__cudaparm_cuda_motion_blitquad___val_paramquad+48];
	setp.lt.ftz.f32 	%p2, %f1, %f2;
	@%p2 bra 	$Lt_126_319234;
	cvt.rn.f32.s32 	%f3, %r10;
	ld.param.f32 	%f4, [__cudaparm_cuda_motion_blitquad___val_paramquad+52];
	setp.lt.ftz.f32 	%p3, %f3, %f4;
	@%p3 bra 	$Lt_126_319234;
	ld.param.f32 	%f5, [__cudaparm_cuda_motion_blitquad___val_paramquad+64];
	setp.le.ftz.f32 	%p4, %f5, %f1;
	@%p4 bra 	$Lt_126_319234;
	ld.param.f32 	%f6, [__cudaparm_cuda_motion_blitquad___val_paramquad+68];
	setp.le.ftz.f32 	%p5, %f6, %f3;
	@!%p5 bra 	$L_126_270338;
$Lt_126_319234:
$L_126_270594:
	ld.param.s8 	%r19, [__cudaparm_cuda_motion_blitquad_inDoCompositeOver];
	mov.u32 	%r20, 0;
	setp.ne.s32 	%p6, %r19, %r20;
	@%p6 bra 	$LBB358_cuda_motion_blitquad;
	ld.param.s32 	%r21, [__cudaparm_cuda_motion_blitquad_dstPitch];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_cuda_motion_blitquad_dstFrame];
	ld.param.s32 	%r24, [__cudaparm_cuda_motion_blitquad_inDeviceFormat];
	mov.u32 	%r25, 0;
	setp.ne.s32 	%p7, %r24, %r25;
	@%p7 bra 	$Lt_126_272642;
	.loc	21	126	0
	mov.f32 	%f7, 0f00000000;     	// 0
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f7;
	mov.b32		%r26, %b1; }
	mov.s32 	%r27, %r26;
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	st.global.v4.u16 	[%rd4+0], {%r27,%r27,%r27,%r27};
	.loc	6	568	0
	bra.uni 	$LBB358_cuda_motion_blitquad;
$Lt_126_272642:
	.loc	21	126	0
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	mov.f32 	%f8, 0f00000000;     	// 0
	mov.f32 	%f9, 0f00000000;     	// 0
	mov.f32 	%f10, 0f00000000;    	// 0
	mov.f32 	%f11, 0f00000000;    	// 0
	st.global.v4.f32 	[%rd6+0], {%f8,%f9,%f10,%f11};
	bra.uni 	$LBB358_cuda_motion_blitquad;
$L_126_270338:
	.loc	6	574	0
	sub.ftz.f32 	%f12, %f1, %f2;
	sub.ftz.f32 	%f13, %f3, %f4;
	cvt.rzi.ftz.s32.f32 	%r28, %f12;
	cvt.rzi.ftz.s32.f32 	%r29, %f13;
	cvt.rn.f32.s32 	%f14, %r28;
	cvt.rn.f32.s32 	%f15, %r29;
	ld.param.f32 	%f16, [__cudaparm_cuda_motion_blitquad___val_paramquad+84];
	mul.ftz.f32 	%f17, %f15, %f16;
	ld.param.f32 	%f18, [__cudaparm_cuda_motion_blitquad___val_paramquad+80];
	fma.rn.ftz.f32 	%f19, %f14, %f18, %f17;
	cvt.rzi.ftz.s32.f32 	%r30, %f19;
	ld.param.s32 	%r31, [__cudaparm_cuda_motion_blitquad_tWidth];
	add.s32 	%r32, %r31, %r30;
	sub.s32 	%r33, %r32, 1;
	add.ftz.f32 	%f20, %f18, %f16;
	mov.f32 	%f21, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p8, %f20, %f21;
	selp.s32 	%r34, %r33, %r30, %p8;
	.loc	6	581	0
	mul.ftz.f32 	%f22, %f14, %f16;
	mul.ftz.f32 	%f23, %f15, %f18;
	sub.ftz.f32 	%f24, %f23, %f22;
	cvt.rzi.ftz.s32.f32 	%r35, %f24;
	ld.param.s32 	%r36, [__cudaparm_cuda_motion_blitquad_tHeight];
	add.s32 	%r37, %r36, %r35;
	sub.s32 	%r38, %r37, 1;
	sub.ftz.f32 	%f25, %f18, %f16;
	mov.f32 	%f26, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p9, %f25, %f26;
	selp.s32 	%r39, %r38, %r35, %p9;
	ld.param.s32 	%r40, [__cudaparm_cuda_motion_blitquad_inDeviceFormat];
	mov.s32 	%r41, 0;
	setp.eq.s32 	%p10, %r40, %r41;
	ld.param.s32 	%r42, [__cudaparm_cuda_motion_blitquad_srcPitch];
	mul.lo.s32 	%r43, %r42, %r39;
	add.s32 	%r44, %r34, %r43;
	cvt.s64.s32 	%rd7, %r44;
	ld.param.u64 	%rd8, [__cudaparm_cuda_motion_blitquad_srcFrame];
	@!%p10 bra 	$Lt_126_273154;
	.loc	21	115	0
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	ld.global.v4.u16 	{%r45,%r46,%r47,%r48}, [%rd10+0];
	.loc	6	583	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r45;
	cvt.ftz.f32.f16	%f27, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r46;
	cvt.ftz.f32.f16	%f28, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r47;
	cvt.ftz.f32.f16	%f29, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r48;
	cvt.ftz.f32.f16	%f30, %b1; }
	bra.uni 	$Lt_126_272898;
$Lt_126_273154:
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	ld.global.v4.f32 	{%f27,%f28,%f29,%f30}, [%rd12+0];
$Lt_126_272898:
	.loc	6	584	0
	mov.f32 	%f31, %f27;
	mov.f32 	%f32, %f28;
	mov.f32 	%f33, %f29;
	mov.f32 	%f34, %f30;
	ld.param.u32 	%r49, [__cudaparm_cuda_motion_blitquad_blendMode];
	mov.s32 	%r50, 18;
	setp.eq.s32 	%p11, %r49, %r50;
	ld.param.s32 	%r51, [__cudaparm_cuda_motion_blitquad_dstPitch];
	mul.lo.s32 	%r52, %r51, %r10;
	add.s32 	%r53, %r8, %r52;
	cvt.s64.s32 	%rd13, %r53;
	ld.param.u64 	%rd14, [__cudaparm_cuda_motion_blitquad_dstFrame];
	ld.param.s8 	%r54, [__cudaparm_cuda_motion_blitquad_inDoCompositeOver];
	mov.u32 	%r55, 0;
	setp.eq.s32 	%p12, %r54, %r55;
	@%p12 bra 	$Lt_126_273666;
	@!%p10 bra 	$Lt_126_274178;
	.loc	21	115	0
	mul.lo.u64 	%rd15, %rd13, 8;
	add.u64 	%rd16, %rd14, %rd15;
	ld.global.v4.u16 	{%r56,%r57,%r58,%r59}, [%rd16+0];
	.loc	6	166	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r56;
	cvt.ftz.f32.f16	%f35, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r57;
	cvt.ftz.f32.f16	%f36, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r58;
	cvt.ftz.f32.f16	%f37, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r59;
	cvt.ftz.f32.f16	%f38, %b1; }
	bra.uni 	$Lt_126_273922;
$Lt_126_274178:
	mul.lo.u64 	%rd17, %rd13, 16;
	add.u64 	%rd18, %rd14, %rd17;
	ld.global.v4.f32 	{%f35,%f36,%f37,%f38}, [%rd18+0];
$Lt_126_273922:
	@!%p11 bra 	$Lt_126_274690;
	.loc	5	255	0
	mov.f32 	%f39, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p13, %f31, %f39;
	@!%p13 bra 	$Lt_126_274946;
	.loc	5	234	0
	neg.ftz.f32 	%f40, %f31;
	lg2.approx.ftz.f32 	%f41, %f40;
	mov.f32 	%f42, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f43, %f41, %f42;
	ex2.approx.ftz.f32 	%f44, %f43;
	neg.ftz.f32 	%f45, %f44;
	bra.uni 	$LDWendi___log2f_303_80;
$Lt_126_274946:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f46, %f31;
	mov.f32 	%f47, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f48, %f46, %f47;
	ex2.approx.ftz.f32 	%f45, %f48;
$LDWendi___log2f_303_80:
	.loc	5	256	0
	mov.f32 	%f49, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p14, %f32, %f49;
	@!%p14 bra 	$Lt_126_275458;
	.loc	5	234	0
	neg.ftz.f32 	%f50, %f32;
	lg2.approx.ftz.f32 	%f51, %f50;
	mov.f32 	%f52, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f53, %f51, %f52;
	ex2.approx.ftz.f32 	%f54, %f53;
	neg.ftz.f32 	%f55, %f54;
	bra.uni 	$LDWendi___log2f_303_78;
$Lt_126_275458:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f56, %f32;
	mov.f32 	%f57, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f58, %f56, %f57;
	ex2.approx.ftz.f32 	%f55, %f58;
$LDWendi___log2f_303_78:
	.loc	5	257	0
	mov.f32 	%f59, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p15, %f33, %f59;
	@!%p15 bra 	$Lt_126_275970;
	.loc	5	234	0
	neg.ftz.f32 	%f60, %f33;
	lg2.approx.ftz.f32 	%f61, %f60;
	mov.f32 	%f62, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f63, %f61, %f62;
	ex2.approx.ftz.f32 	%f64, %f63;
	neg.ftz.f32 	%f65, %f64;
	bra.uni 	$LDWendi___log2f_303_76;
$Lt_126_275970:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f66, %f33;
	mov.f32 	%f67, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f68, %f66, %f67;
	ex2.approx.ftz.f32 	%f65, %f68;
$LDWendi___log2f_303_76:
	.loc	22	83	0
	cvt.ftz.sat.f32.f32 	%f69, %f34;
	cvt.ftz.sat.f32.f32 	%f70, %f38;
	ld.param.f32 	%f71, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f72, %f71, %f69;
	cvt.ftz.sat.f32.f32 	%f73, %f72;
	mov.f32 	%f74, 0f3f800000;    	// 1
	sub.ftz.f32 	%f75, %f74, %f73;
	mul.ftz.f32 	%f76, %f70, %f75;
	add.ftz.f32 	%f77, %f73, %f76;
	mov.f32 	%f78, 0fb70637bd;    	// -8e-006
	add.ftz.f32 	%f79, %f77, %f78;
	mov.f32 	%f80, 0f00000000;    	// 0
	setp.le.ftz.f32 	%p16, %f79, %f80;
	@!%p16 bra 	$Lt_126_276738;
	mov.f32 	%f81, 0f00000000;    	// 0
	mov.f32 	%f82, 0f00000000;    	// 0
	mov.f32 	%f83, 0f00000000;    	// 0
	mov.f32 	%f84, 0f00000000;    	// 0
	bra.uni 	$Lt_126_276482;
$Lt_126_276738:
	mov.f32 	%f85, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p17, %f35, %f85;
	@!%p17 bra 	$Lt_126_276994;
	.loc	5	234	0
	neg.ftz.f32 	%f86, %f35;
	lg2.approx.ftz.f32 	%f87, %f86;
	mov.f32 	%f88, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f89, %f87, %f88;
	ex2.approx.ftz.f32 	%f90, %f89;
	neg.ftz.f32 	%f91, %f90;
	bra.uni 	$LDWendi___log2f_303_74;
$Lt_126_276994:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f92, %f35;
	mov.f32 	%f93, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f94, %f92, %f93;
	ex2.approx.ftz.f32 	%f91, %f94;
$LDWendi___log2f_303_74:
	.loc	22	97	0
	mov.f32 	%f95, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p18, %f36, %f95;
	@!%p18 bra 	$Lt_126_277506;
	.loc	5	234	0
	neg.ftz.f32 	%f96, %f36;
	lg2.approx.ftz.f32 	%f97, %f96;
	mov.f32 	%f98, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f99, %f97, %f98;
	ex2.approx.ftz.f32 	%f100, %f99;
	neg.ftz.f32 	%f101, %f100;
	bra.uni 	$LDWendi___log2f_303_72;
$Lt_126_277506:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f102, %f36;
	mov.f32 	%f103, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f104, %f102, %f103;
	ex2.approx.ftz.f32 	%f101, %f104;
$LDWendi___log2f_303_72:
	.loc	22	98	0
	mov.f32 	%f105, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p19, %f37, %f105;
	@!%p19 bra 	$Lt_126_278018;
	.loc	5	234	0
	neg.ftz.f32 	%f106, %f37;
	lg2.approx.ftz.f32 	%f107, %f106;
	mov.f32 	%f108, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f109, %f107, %f108;
	ex2.approx.ftz.f32 	%f110, %f109;
	neg.ftz.f32 	%f111, %f110;
	bra.uni 	$LDWendi___log2f_303_70;
$Lt_126_278018:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f112, %f37;
	mov.f32 	%f113, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f114, %f112, %f113;
	ex2.approx.ftz.f32 	%f111, %f114;
$LDWendi___log2f_303_70:
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f115, %f77;
	mov.f32 	%f116, %f115;
	mov.f32 	%f117, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f118, %f115, %f117;
	mov.f32 	%f119, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p20, %f118, %f119;
	@%p20 bra 	$Lt_126_278786;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f120, %f115;
	mul.ftz.f32 	%f121, %f65, %f73;
	fma.rn.ftz.f32 	%f122, %f76, %f111, %f121;
	mul.ftz.f32 	%f123, %f120, %f122;
	.loc	5	214	0
	mul.ftz.f32 	%f124, %f55, %f73;
	fma.rn.ftz.f32 	%f125, %f76, %f101, %f124;
	mul.ftz.f32 	%f126, %f120, %f125;
	.loc	5	215	0
	mul.ftz.f32 	%f127, %f45, %f73;
	fma.rn.ftz.f32 	%f128, %f76, %f91, %f127;
	mul.ftz.f32 	%f129, %f120, %f128;
	bra.uni 	$Lt_126_278530;
$Lt_126_278786:
	.loc	5	219	0
	mov.f32 	%f123, 0f00000000;   	// 0
	mov.f32 	%f126, 0f00000000;   	// 0
	mov.f32 	%f129, 0f00000000;   	// 0
	mov.f32 	%f116, 0f00000000;   	// 0
$Lt_126_278530:
	.loc	5	266	0
	mov.f32 	%f130, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p21, %f129, %f130;
	@!%p21 bra 	$Lt_126_279042;
	.loc	5	242	0
	neg.ftz.f32 	%f131, %f129;
	lg2.approx.ftz.f32 	%f132, %f131;
	mov.f32 	%f133, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f134, %f132, %f133;
	ex2.approx.ftz.f32 	%f135, %f134;
	neg.ftz.f32 	%f136, %f135;
	bra.uni 	$LDWendi___log2f_303_68;
$Lt_126_279042:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f137, %f129;
	mov.f32 	%f138, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f139, %f137, %f138;
	ex2.approx.ftz.f32 	%f136, %f139;
$LDWendi___log2f_303_68:
	.loc	5	267	0
	mov.f32 	%f140, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p22, %f126, %f140;
	@!%p22 bra 	$Lt_126_279554;
	.loc	5	242	0
	neg.ftz.f32 	%f141, %f126;
	lg2.approx.ftz.f32 	%f142, %f141;
	mov.f32 	%f143, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f144, %f142, %f143;
	ex2.approx.ftz.f32 	%f145, %f144;
	neg.ftz.f32 	%f146, %f145;
	bra.uni 	$LDWendi___log2f_303_66;
$Lt_126_279554:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f147, %f126;
	mov.f32 	%f148, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f149, %f147, %f148;
	ex2.approx.ftz.f32 	%f146, %f149;
$LDWendi___log2f_303_66:
	.loc	5	268	0
	mov.f32 	%f150, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p23, %f123, %f150;
	@!%p23 bra 	$Lt_126_280066;
	.loc	5	242	0
	neg.ftz.f32 	%f151, %f123;
	lg2.approx.ftz.f32 	%f152, %f151;
	mov.f32 	%f153, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f154, %f152, %f153;
	ex2.approx.ftz.f32 	%f155, %f154;
	neg.ftz.f32 	%f156, %f155;
	bra.uni 	$LDWendi___log2f_303_64;
$Lt_126_280066:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f157, %f123;
	mov.f32 	%f158, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f159, %f157, %f158;
	ex2.approx.ftz.f32 	%f156, %f159;
$LDWendi___log2f_303_64:
	.loc	22	101	0
	mov.f32 	%f84, %f136;
	mov.f32 	%f83, %f146;
	mov.f32 	%f82, %f156;
	mov.f32 	%f81, %f116;
$Lt_126_276482:
	.loc	6	170	0
	mov.f32 	%f31, %f84;
	mov.f32 	%f32, %f83;
	mov.f32 	%f33, %f82;
	mov.f32 	%f34, %f81;
	bra.uni 	$Lt_126_317954;
$Lt_126_274690:
	.loc	6	189	0
	mov.u32 	%r60, 3;
	setp.eq.s32 	%p24, %r49, %r60;
	@%p24 bra 	$Lt_126_258;
	mov.u32 	%r61, 11;
	setp.eq.s32 	%p25, %r49, %r61;
	@%p25 bra 	$Lt_126_770;
	mov.u32 	%r62, 17;
	setp.eq.s32 	%p26, %r49, %r62;
	@%p26 bra 	$Lt_126_1026;
	mov.u32 	%r63, 22;
	setp.eq.s32 	%p27, %r49, %r63;
	@%p27 bra 	$Lt_126_1282;
	mov.u32 	%r64, 6;
	setp.eq.s32 	%p28, %r49, %r64;
	@%p28 bra 	$Lt_126_1538;
	mov.u32 	%r65, 1;
	setp.eq.s32 	%p29, %r49, %r65;
	@%p29 bra 	$Lt_126_1794;
	mov.u32 	%r66, 13;
	setp.eq.s32 	%p30, %r49, %r66;
	@%p30 bra 	$Lt_126_2050;
	mov.u32 	%r67, 4;
	setp.eq.s32 	%p31, %r49, %r67;
	@%p31 bra 	$Lt_126_2306;
	mov.u32 	%r68, 2;
	setp.eq.s32 	%p32, %r49, %r68;
	@%p32 bra 	$Lt_126_2562;
	mov.u32 	%r69, 14;
	setp.eq.s32 	%p33, %r49, %r69;
	@%p33 bra 	$Lt_126_2818;
	mov.u32 	%r70, 12;
	setp.eq.s32 	%p34, %r49, %r70;
	@%p34 bra 	$Lt_126_3074;
	mov.u32 	%r71, 19;
	setp.eq.s32 	%p35, %r49, %r71;
	@%p35 bra 	$Lt_126_3330;
	mov.u32 	%r72, 23;
	setp.eq.s32 	%p36, %r49, %r72;
	@%p36 bra 	$Lt_126_3586;
	mov.u32 	%r73, 8;
	setp.eq.s32 	%p37, %r49, %r73;
	@%p37 bra 	$Lt_126_3842;
	mov.u32 	%r74, 24;
	setp.eq.s32 	%p38, %r49, %r74;
	@%p38 bra 	$Lt_126_4098;
	mov.u32 	%r75, 15;
	setp.eq.s32 	%p39, %r49, %r75;
	@%p39 bra 	$Lt_126_4354;
	mov.u32 	%r76, 20;
	setp.eq.s32 	%p40, %r49, %r76;
	@%p40 bra 	$Lt_126_4610;
	mov.u32 	%r77, 9;
	setp.eq.s32 	%p41, %r49, %r77;
	@%p41 bra 	$Lt_126_4866;
	mov.u32 	%r78, 5;
	setp.eq.s32 	%p42, %r49, %r78;
	@%p42 bra 	$Lt_126_5122;
	mov.u32 	%r79, 7;
	setp.eq.s32 	%p43, %r49, %r79;
	@%p43 bra 	$Lt_126_5378;
	mov.u32 	%r80, 25;
	setp.eq.s32 	%p44, %r49, %r80;
	@%p44 bra 	$Lt_126_5634;
	mov.u32 	%r81, 26;
	setp.eq.s32 	%p45, %r49, %r81;
	@%p45 bra 	$Lt_126_5890;
	mov.u32 	%r82, 10;
	setp.eq.s32 	%p46, %r49, %r82;
	@%p46 bra 	$Lt_126_6146;
	mov.u32 	%r83, 21;
	setp.eq.s32 	%p47, %r49, %r83;
	@%p47 bra 	$Lt_126_6402;
	mov.u32 	%r84, 0;
	setp.eq.s32 	%p48, %r49, %r84;
	@%p48 bra 	$Lt_126_6658;
	mov.u32 	%r85, 16;
	setp.eq.s32 	%p49, %r49, %r85;
	@%p49 bra 	$Lt_126_6914;
	bra.uni 	$Lt_126_317954;
$Lt_126_258:
	.loc	22	469	0
	ld.param.f32 	%f160, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f160, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f163, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f163;
	mov.f32 	%f165, %f164;
	mov.f32 	%f166, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f167, %f164, %f166;
	mov.f32 	%f168, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p50, %f167, %f168;
	@!%p50 bra 	$Lt_126_280834;
	mov.f32 	%f169, 0f00000000;   	// 0
	mov.f32 	%f170, 0f00000000;   	// 0
	mov.f32 	%f171, 0f00000000;   	// 0
	mov.f32 	%f165, 0f00000000;   	// 0
	bra.uni 	$Lt_126_280578;
$Lt_126_280834:
	mov.f32 	%f172, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f172, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f176, 0f3f800000;   	// 1
	mul.ftz.f32 	%f177, %f161, %f174;
	sub.ftz.f32 	%f178, %f176, %f177;
	min.ftz.f32 	%f179, %f31, %f35;
	mul.ftz.f32 	%f180, %f38, %f179;
	fma.rn.ftz.f32 	%f181, %f31, %f173, %f180;
	mul.ftz.f32 	%f182, %f175, %f181;
	fma.rn.ftz.f32 	%f171, %f35, %f178, %f182;
	min.ftz.f32 	%f183, %f32, %f36;
	mul.ftz.f32 	%f184, %f38, %f183;
	fma.rn.ftz.f32 	%f185, %f32, %f173, %f184;
	mul.ftz.f32 	%f186, %f175, %f185;
	fma.rn.ftz.f32 	%f170, %f36, %f178, %f186;
	min.ftz.f32 	%f187, %f33, %f37;
	mul.ftz.f32 	%f188, %f38, %f187;
	fma.rn.ftz.f32 	%f189, %f33, %f173, %f188;
	mul.ftz.f32 	%f190, %f175, %f189;
	fma.rn.ftz.f32 	%f169, %f37, %f178, %f190;
$Lt_126_280578:
	.loc	6	191	0
	mov.f32 	%f31, %f171;
	mov.f32 	%f32, %f170;
	mov.f32 	%f33, %f169;
	mov.f32 	%f34, %f165;
	bra.uni 	$Lt_126_317954;
$Lt_126_770:
	.loc	22	470	0
	ld.param.f32 	%f191, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f191, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f192, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f192;
	mov.f32 	%f193, %f164;
	mov.f32 	%f194, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f195, %f164, %f194;
	mov.f32 	%f196, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p51, %f195, %f196;
	@!%p51 bra 	$Lt_126_281346;
	mov.f32 	%f197, 0f00000000;   	// 0
	mov.f32 	%f198, 0f00000000;   	// 0
	mov.f32 	%f199, 0f00000000;   	// 0
	mov.f32 	%f193, 0f00000000;   	// 0
	bra.uni 	$Lt_126_281090;
$Lt_126_281346:
	mov.f32 	%f200, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f200, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f201, 0f3f800000;   	// 1
	mul.ftz.f32 	%f202, %f161, %f174;
	sub.ftz.f32 	%f178, %f201, %f202;
	max.ftz.f32 	%f203, %f31, %f35;
	mul.ftz.f32 	%f204, %f38, %f203;
	fma.rn.ftz.f32 	%f205, %f31, %f173, %f204;
	mul.ftz.f32 	%f206, %f175, %f205;
	fma.rn.ftz.f32 	%f199, %f35, %f178, %f206;
	max.ftz.f32 	%f207, %f32, %f36;
	mul.ftz.f32 	%f208, %f38, %f207;
	fma.rn.ftz.f32 	%f209, %f32, %f173, %f208;
	mul.ftz.f32 	%f210, %f175, %f209;
	fma.rn.ftz.f32 	%f198, %f36, %f178, %f210;
	max.ftz.f32 	%f211, %f33, %f37;
	mul.ftz.f32 	%f212, %f38, %f211;
	fma.rn.ftz.f32 	%f213, %f33, %f173, %f212;
	mul.ftz.f32 	%f214, %f175, %f213;
	fma.rn.ftz.f32 	%f197, %f37, %f178, %f214;
$Lt_126_281090:
	.loc	6	192	0
	mov.f32 	%f31, %f199;
	mov.f32 	%f32, %f198;
	mov.f32 	%f33, %f197;
	mov.f32 	%f34, %f193;
	bra.uni 	$Lt_126_317954;
$Lt_126_1026:
	.loc	22	471	0
	ld.param.f32 	%f215, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f215, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f216, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f216;
	mov.f32 	%f217, %f164;
	mov.f32 	%f218, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f219, %f164, %f218;
	mov.f32 	%f220, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p52, %f219, %f220;
	@!%p52 bra 	$Lt_126_281858;
	mov.f32 	%f221, 0f00000000;   	// 0
	mov.f32 	%f222, 0f00000000;   	// 0
	mov.f32 	%f223, 0f00000000;   	// 0
	mov.f32 	%f217, 0f00000000;   	// 0
	bra.uni 	$Lt_126_281602;
$Lt_126_281858:
	mov.f32 	%f224, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f224, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f225, 0f3f800000;   	// 1
	mul.ftz.f32 	%f226, %f161, %f174;
	sub.ftz.f32 	%f178, %f225, %f226;
	mul.ftz.f32 	%f227, %f31, %f35;
	mov.f32 	%f228, 0f00000000;   	// 0
	max.ftz.f32 	%f229, %f227, %f228;
	mov.f32 	%f230, 0f3f800000;   	// 1
	min.ftz.f32 	%f231, %f229, %f230;
	mul.ftz.f32 	%f232, %f38, %f231;
	fma.rn.ftz.f32 	%f233, %f31, %f173, %f232;
	mul.ftz.f32 	%f234, %f175, %f233;
	fma.rn.ftz.f32 	%f223, %f35, %f178, %f234;
	mul.ftz.f32 	%f235, %f32, %f36;
	mov.f32 	%f236, 0f00000000;   	// 0
	max.ftz.f32 	%f237, %f235, %f236;
	mov.f32 	%f238, 0f3f800000;   	// 1
	min.ftz.f32 	%f239, %f237, %f238;
	mul.ftz.f32 	%f240, %f38, %f239;
	fma.rn.ftz.f32 	%f241, %f32, %f173, %f240;
	mul.ftz.f32 	%f242, %f175, %f241;
	fma.rn.ftz.f32 	%f222, %f36, %f178, %f242;
	mul.ftz.f32 	%f243, %f33, %f37;
	mov.f32 	%f244, 0f00000000;   	// 0
	max.ftz.f32 	%f245, %f243, %f244;
	mov.f32 	%f246, 0f3f800000;   	// 1
	min.ftz.f32 	%f247, %f245, %f246;
	mul.ftz.f32 	%f248, %f38, %f247;
	fma.rn.ftz.f32 	%f249, %f33, %f173, %f248;
	mul.ftz.f32 	%f250, %f175, %f249;
	fma.rn.ftz.f32 	%f221, %f37, %f178, %f250;
$Lt_126_281602:
	.loc	6	193	0
	mov.f32 	%f31, %f223;
	mov.f32 	%f32, %f222;
	mov.f32 	%f33, %f221;
	mov.f32 	%f34, %f217;
	bra.uni 	$Lt_126_317954;
$Lt_126_1282:
	.loc	22	472	0
	ld.param.f32 	%f251, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f251, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f252, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f252;
	mov.f32 	%f253, %f164;
	mov.f32 	%f254, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f255, %f164, %f254;
	mov.f32 	%f256, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p53, %f255, %f256;
	@!%p53 bra 	$Lt_126_282370;
	mov.f32 	%f257, 0f00000000;   	// 0
	mov.f32 	%f258, 0f00000000;   	// 0
	mov.f32 	%f259, 0f00000000;   	// 0
	mov.f32 	%f253, 0f00000000;   	// 0
	bra.uni 	$Lt_126_282114;
$Lt_126_282370:
	mov.f32 	%f260, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f260, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f261, 0f3f800000;   	// 1
	mul.ftz.f32 	%f262, %f161, %f174;
	sub.ftz.f32 	%f178, %f261, %f262;
	mov.f32 	%f263, 0f3f800000;   	// 1
	mov.f32 	%f264, 0f3f800000;   	// 1
	mov.f32 	%f265, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f266, %f35, %f265;
	mov.f32 	%f267, 0f3f800000;   	// 1
	min.ftz.f32 	%f268, %f266, %f267;
	sub.ftz.f32 	%f269, %f264, %f268;
	mov.f32 	%f270, 0f3f800000;   	// 1
	mov.f32 	%f271, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f272, %f31, %f271;
	mov.f32 	%f273, 0f3f800000;   	// 1
	min.ftz.f32 	%f274, %f272, %f273;
	sub.ftz.f32 	%f275, %f270, %f274;
	mul.ftz.f32 	%f276, %f269, %f275;
	sub.ftz.f32 	%f277, %f263, %f276;
	mov.f32 	%f278, 0f00000000;   	// 0
	max.ftz.f32 	%f279, %f277, %f278;
	mov.f32 	%f280, 0f3f800000;   	// 1
	min.ftz.f32 	%f281, %f279, %f280;
	mul.ftz.f32 	%f282, %f38, %f281;
	fma.rn.ftz.f32 	%f283, %f31, %f173, %f282;
	mul.ftz.f32 	%f284, %f175, %f283;
	fma.rn.ftz.f32 	%f259, %f35, %f178, %f284;
	mov.f32 	%f285, 0f3f800000;   	// 1
	mov.f32 	%f286, 0f3f800000;   	// 1
	mov.f32 	%f287, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f288, %f36, %f287;
	mov.f32 	%f289, 0f3f800000;   	// 1
	min.ftz.f32 	%f290, %f288, %f289;
	sub.ftz.f32 	%f291, %f286, %f290;
	mov.f32 	%f292, 0f3f800000;   	// 1
	mov.f32 	%f293, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f294, %f32, %f293;
	mov.f32 	%f295, 0f3f800000;   	// 1
	min.ftz.f32 	%f296, %f294, %f295;
	sub.ftz.f32 	%f297, %f292, %f296;
	mul.ftz.f32 	%f298, %f291, %f297;
	sub.ftz.f32 	%f299, %f285, %f298;
	mov.f32 	%f300, 0f00000000;   	// 0
	max.ftz.f32 	%f301, %f299, %f300;
	mov.f32 	%f302, 0f3f800000;   	// 1
	min.ftz.f32 	%f303, %f301, %f302;
	mul.ftz.f32 	%f304, %f38, %f303;
	fma.rn.ftz.f32 	%f305, %f32, %f173, %f304;
	mul.ftz.f32 	%f306, %f175, %f305;
	fma.rn.ftz.f32 	%f258, %f36, %f178, %f306;
	mov.f32 	%f307, 0f3f800000;   	// 1
	mov.f32 	%f308, 0f3f800000;   	// 1
	mov.f32 	%f309, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f310, %f37, %f309;
	mov.f32 	%f311, 0f3f800000;   	// 1
	min.ftz.f32 	%f312, %f310, %f311;
	sub.ftz.f32 	%f313, %f308, %f312;
	mov.f32 	%f314, 0f3f800000;   	// 1
	mov.f32 	%f315, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f316, %f33, %f315;
	mov.f32 	%f317, 0f3f800000;   	// 1
	min.ftz.f32 	%f318, %f316, %f317;
	sub.ftz.f32 	%f319, %f314, %f318;
	mul.ftz.f32 	%f320, %f313, %f319;
	sub.ftz.f32 	%f321, %f307, %f320;
	mov.f32 	%f322, 0f00000000;   	// 0
	max.ftz.f32 	%f323, %f321, %f322;
	mov.f32 	%f324, 0f3f800000;   	// 1
	min.ftz.f32 	%f325, %f323, %f324;
	mul.ftz.f32 	%f326, %f38, %f325;
	fma.rn.ftz.f32 	%f327, %f33, %f173, %f326;
	mul.ftz.f32 	%f328, %f175, %f327;
	fma.rn.ftz.f32 	%f257, %f37, %f178, %f328;
$Lt_126_282114:
	.loc	6	194	0
	mov.f32 	%f31, %f259;
	mov.f32 	%f32, %f258;
	mov.f32 	%f33, %f257;
	mov.f32 	%f34, %f253;
	bra.uni 	$Lt_126_317954;
$Lt_126_1538:
	.loc	22	526	0
	ld.param.f32 	%f329, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f329, %f34;
	mov.f32 	%f330, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f331, %f161, %f330;
	mov.f32 	%f332, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p54, %f331, %f332;
	@!%p54 bra 	$Lt_126_237570;
	.loc	22	528	0
	mov.f32 	%f333, %f35;
	mov.f32 	%f334, %f36;
	mov.f32 	%f335, %f37;
	mov.f32 	%f336, %f38;
	bra.uni 	$LDWendi__Z4Randj_303_56;
$Lt_126_237570:
	.loc	22	530	0
	mov.f32 	%f337, 0f370637bd;   	// 8e-006
	add.ftz.f32 	%f338, %f161, %f337;
	mov.f32 	%f339, 0f3f800000;   	// 1
	setp.ge.ftz.f32 	%p55, %f338, %f339;
	@!%p55 bra 	$Lt_126_237826;
	.loc	22	532	0
	mov.f32 	%f333, %f31;
	mov.f32 	%f334, %f32;
	mov.f32 	%f335, %f33;
	mov.f32 	%f336, %f34;
	bra.uni 	$LDWendi__Z4Randj_303_56;
$Lt_126_237826:
	.loc	21	143	0
	mov.s32 	%r86, 1;
	sub.s32 	%r87, %r86, %r8;
	shr.u32 	%r88, %r10, 13;
	sub.u32 	%r89, %r8, %r10;
	sub.u32 	%r90, %r87, %r10;
	xor.b32 	%r91, %r88, %r90;
	shl.b32 	%r92, %r91, 8;
	sub.u32 	%r93, %r89, %r91;
	sub.u32 	%r94, %r10, %r91;
	xor.b32 	%r95, %r92, %r93;
	shr.u32 	%r96, %r95, 13;
	sub.u32 	%r97, %r94, %r95;
	sub.u32 	%r98, %r91, %r95;
	xor.b32 	%r99, %r96, %r97;
	shr.u32 	%r100, %r99, 12;
	sub.u32 	%r101, %r98, %r99;
	xor.b32 	%r102, %r100, %r101;
	sub.u32 	%r103, %r95, %r99;
	sub.u32 	%r104, %r103, %r102;
	shl.b32 	%r105, %r102, 16;
	xor.b32 	%r106, %r104, %r105;
	.loc	21	144	0
	sub.u32 	%r107, %r99, %r102;
	sub.u32 	%r108, %r107, %r106;
	shr.u32 	%r109, %r106, 5;
	xor.b32 	%r110, %r108, %r109;
	.loc	21	145	0
	sub.u32 	%r111, %r102, %r106;
	sub.u32 	%r112, %r111, %r110;
	shr.u32 	%r113, %r110, 3;
	xor.b32 	%r114, %r112, %r113;
	.loc	21	146	0
	sub.u32 	%r115, %r106, %r110;
	sub.u32 	%r116, %r115, %r114;
	shl.b32 	%r117, %r114, 10;
	xor.b32 	%r118, %r116, %r117;
	.loc	21	147	0
	sub.u32 	%r119, %r110, %r114;
	sub.u32 	%r120, %r119, %r118;
	shr.u32 	%r121, %r118, 15;
	xor.b32 	%r122, %r120, %r121;
	.loc	22	537	0
	mov.f32 	%f340, 0f46fffe00;   	// 32767
	mul.ftz.f32 	%f341, %f161, %f340;
	cvt.rzi.ftz.s32.f32 	%r123, %f341;
	mul.lo.u32 	%r124, %r122, 1103515245;
	add.u32 	%r125, %r124, 12345;
	shr.u32 	%r126, %r125, 16;
	and.b32 	%r127, %r126, 255;
	shl.b32 	%r128, %r127, 7;
	mul.lo.u32 	%r129, %r122, -1029531031;
	sub.u32 	%r130, %r129, 740551042;
	shr.u32 	%r131, %r130, 16;
	and.b32 	%r132, %r131, 255;
	xor.b32 	%r133, %r128, %r132;
	setp.lt.s32 	%p56, %r123, %r133;
	@%p56 bra 	$Lt_126_282882;
	mov.f32 	%f342, %f31;
	mov.f32 	%f343, %f32;
	mov.f32 	%f344, %f33;
	mov.f32 	%f345, %f34;
	bra.uni 	$Lt_126_282626;
$Lt_126_282882:
	mov.f32 	%f342, %f35;
	mov.f32 	%f343, %f36;
	mov.f32 	%f344, %f37;
	mov.f32 	%f345, %f38;
$Lt_126_282626:
	mov.f32 	%f333, %f342;
	mov.f32 	%f334, %f343;
	mov.f32 	%f335, %f344;
	mov.f32 	%f336, %f345;
$LDWendi__Z4Randj_303_56:
	.loc	6	195	0
	mov.f32 	%f31, %f333;
	mov.f32 	%f32, %f334;
	mov.f32 	%f33, %f335;
	mov.f32 	%f34, %f336;
	bra.uni 	$Lt_126_317954;
$Lt_126_1794:
	.loc	22	473	0
	ld.param.f32 	%f346, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f346, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f347, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f347;
	mov.f32 	%f348, %f164;
	mov.f32 	%f349, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f350, %f164, %f349;
	mov.f32 	%f351, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p57, %f350, %f351;
	@!%p57 bra 	$Lt_126_283394;
	mov.f32 	%f352, 0f00000000;   	// 0
	mov.f32 	%f353, 0f00000000;   	// 0
	mov.f32 	%f354, 0f00000000;   	// 0
	mov.f32 	%f348, 0f00000000;   	// 0
	bra.uni 	$Lt_126_283138;
$Lt_126_283394:
	mov.f32 	%f355, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f355, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f356, 0f3f800000;   	// 1
	mul.ftz.f32 	%f357, %f161, %f174;
	sub.ftz.f32 	%f178, %f356, %f357;
	mov.f32 	%f358, 0f3f800000;   	// 1
	mov.f32 	%f359, 0f3f800000;   	// 1
	mov.f32 	%f360, 0f00000000;   	// 0
	max.ftz.f32 	%f361, %f35, %f360;
	mov.f32 	%f362, 0f3f800000;   	// 1
	min.ftz.f32 	%f363, %f361, %f362;
	sub.ftz.f32 	%f364, %f359, %f363;
	mov.f32 	%f365, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f366, %f31, %f365;
	mov.f32 	%f367, 0f3f800000;   	// 1
	min.ftz.f32 	%f368, %f366, %f367;
	div.approx.ftz.f32 	%f369, %f364, %f368;
	sub.ftz.f32 	%f370, %f358, %f369;
	mov.f32 	%f371, 0f00000000;   	// 0
	max.ftz.f32 	%f372, %f370, %f371;
	mov.f32 	%f373, 0f3f800000;   	// 1
	min.ftz.f32 	%f374, %f372, %f373;
	mul.ftz.f32 	%f375, %f38, %f374;
	fma.rn.ftz.f32 	%f376, %f31, %f173, %f375;
	mul.ftz.f32 	%f377, %f175, %f376;
	fma.rn.ftz.f32 	%f354, %f35, %f178, %f377;
	mov.f32 	%f378, 0f3f800000;   	// 1
	mov.f32 	%f379, 0f3f800000;   	// 1
	mov.f32 	%f380, 0f00000000;   	// 0
	max.ftz.f32 	%f381, %f36, %f380;
	mov.f32 	%f382, 0f3f800000;   	// 1
	min.ftz.f32 	%f383, %f381, %f382;
	sub.ftz.f32 	%f384, %f379, %f383;
	mov.f32 	%f385, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f386, %f32, %f385;
	mov.f32 	%f387, 0f3f800000;   	// 1
	min.ftz.f32 	%f388, %f386, %f387;
	div.approx.ftz.f32 	%f389, %f384, %f388;
	sub.ftz.f32 	%f390, %f378, %f389;
	mov.f32 	%f391, 0f00000000;   	// 0
	max.ftz.f32 	%f392, %f390, %f391;
	mov.f32 	%f393, 0f3f800000;   	// 1
	min.ftz.f32 	%f394, %f392, %f393;
	mul.ftz.f32 	%f395, %f38, %f394;
	fma.rn.ftz.f32 	%f396, %f32, %f173, %f395;
	mul.ftz.f32 	%f397, %f175, %f396;
	fma.rn.ftz.f32 	%f353, %f36, %f178, %f397;
	mov.f32 	%f398, 0f3f800000;   	// 1
	mov.f32 	%f399, 0f3f800000;   	// 1
	mov.f32 	%f400, 0f00000000;   	// 0
	max.ftz.f32 	%f401, %f37, %f400;
	mov.f32 	%f402, 0f3f800000;   	// 1
	min.ftz.f32 	%f403, %f401, %f402;
	sub.ftz.f32 	%f404, %f399, %f403;
	mov.f32 	%f405, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f406, %f33, %f405;
	mov.f32 	%f407, 0f3f800000;   	// 1
	min.ftz.f32 	%f408, %f406, %f407;
	div.approx.ftz.f32 	%f409, %f404, %f408;
	sub.ftz.f32 	%f410, %f398, %f409;
	mov.f32 	%f411, 0f00000000;   	// 0
	max.ftz.f32 	%f412, %f410, %f411;
	mov.f32 	%f413, 0f3f800000;   	// 1
	min.ftz.f32 	%f414, %f412, %f413;
	mul.ftz.f32 	%f415, %f38, %f414;
	fma.rn.ftz.f32 	%f416, %f33, %f173, %f415;
	mul.ftz.f32 	%f417, %f175, %f416;
	fma.rn.ftz.f32 	%f352, %f37, %f178, %f417;
$Lt_126_283138:
	.loc	6	196	0
	mov.f32 	%f31, %f354;
	mov.f32 	%f32, %f353;
	mov.f32 	%f33, %f352;
	mov.f32 	%f34, %f348;
	bra.uni 	$Lt_126_317954;
$Lt_126_2050:
	.loc	22	474	0
	ld.param.f32 	%f418, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f418, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f419, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f419;
	mov.f32 	%f420, %f164;
	mov.f32 	%f421, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f422, %f164, %f421;
	mov.f32 	%f423, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p58, %f422, %f423;
	@!%p58 bra 	$Lt_126_283906;
	mov.f32 	%f424, 0f00000000;   	// 0
	mov.f32 	%f425, 0f00000000;   	// 0
	mov.f32 	%f426, 0f00000000;   	// 0
	mov.f32 	%f420, 0f00000000;   	// 0
	bra.uni 	$Lt_126_283650;
$Lt_126_283906:
	mov.f32 	%f427, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f427, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f428, 0f3f800000;   	// 1
	mul.ftz.f32 	%f429, %f161, %f174;
	sub.ftz.f32 	%f178, %f428, %f429;
	mov.f32 	%f430, 0f00000000;   	// 0
	max.ftz.f32 	%f431, %f31, %f430;
	mov.f32 	%f432, 0f3f800000;   	// 1
	min.ftz.f32 	%f433, %f431, %f432;
	mov.f32 	%f434, 0f00000000;   	// 0
	max.ftz.f32 	%f435, %f35, %f434;
	mov.f32 	%f436, 0f3f800000;   	// 1
	min.ftz.f32 	%f437, %f435, %f436;
	add.ftz.f32 	%f438, %f433, %f437;
	mov.f32 	%f439, 0fbf800000;   	// -1
	add.ftz.f32 	%f440, %f438, %f439;
	mov.f32 	%f441, 0f00000000;   	// 0
	max.ftz.f32 	%f442, %f440, %f441;
	mov.f32 	%f443, 0f3f800000;   	// 1
	min.ftz.f32 	%f444, %f442, %f443;
	mul.ftz.f32 	%f445, %f38, %f444;
	fma.rn.ftz.f32 	%f446, %f31, %f173, %f445;
	mul.ftz.f32 	%f447, %f175, %f446;
	fma.rn.ftz.f32 	%f426, %f35, %f178, %f447;
	mov.f32 	%f448, 0f00000000;   	// 0
	max.ftz.f32 	%f449, %f32, %f448;
	mov.f32 	%f450, 0f3f800000;   	// 1
	min.ftz.f32 	%f451, %f449, %f450;
	mov.f32 	%f452, 0f00000000;   	// 0
	max.ftz.f32 	%f453, %f36, %f452;
	mov.f32 	%f454, 0f3f800000;   	// 1
	min.ftz.f32 	%f455, %f453, %f454;
	add.ftz.f32 	%f456, %f451, %f455;
	mov.f32 	%f457, 0fbf800000;   	// -1
	add.ftz.f32 	%f458, %f456, %f457;
	mov.f32 	%f459, 0f00000000;   	// 0
	max.ftz.f32 	%f460, %f458, %f459;
	mov.f32 	%f461, 0f3f800000;   	// 1
	min.ftz.f32 	%f462, %f460, %f461;
	mul.ftz.f32 	%f463, %f38, %f462;
	fma.rn.ftz.f32 	%f464, %f32, %f173, %f463;
	mul.ftz.f32 	%f465, %f175, %f464;
	fma.rn.ftz.f32 	%f425, %f36, %f178, %f465;
	mov.f32 	%f466, 0f00000000;   	// 0
	max.ftz.f32 	%f467, %f33, %f466;
	mov.f32 	%f468, 0f3f800000;   	// 1
	min.ftz.f32 	%f469, %f467, %f468;
	mov.f32 	%f470, 0f00000000;   	// 0
	max.ftz.f32 	%f471, %f37, %f470;
	mov.f32 	%f472, 0f3f800000;   	// 1
	min.ftz.f32 	%f473, %f471, %f472;
	add.ftz.f32 	%f474, %f469, %f473;
	mov.f32 	%f475, 0fbf800000;   	// -1
	add.ftz.f32 	%f476, %f474, %f475;
	mov.f32 	%f477, 0f00000000;   	// 0
	max.ftz.f32 	%f478, %f476, %f477;
	mov.f32 	%f479, 0f3f800000;   	// 1
	min.ftz.f32 	%f480, %f478, %f479;
	mul.ftz.f32 	%f481, %f38, %f480;
	fma.rn.ftz.f32 	%f482, %f33, %f173, %f481;
	mul.ftz.f32 	%f483, %f175, %f482;
	fma.rn.ftz.f32 	%f424, %f37, %f178, %f483;
$Lt_126_283650:
	.loc	6	197	0
	mov.f32 	%f31, %f426;
	mov.f32 	%f32, %f425;
	mov.f32 	%f33, %f424;
	mov.f32 	%f34, %f420;
	bra.uni 	$Lt_126_317954;
$Lt_126_2306:
	.loc	6	198	0
	ld.param.f32 	%f484, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f484, %f34;
	mov.f32 	%f485, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f486, %f161, %f485;
	mov.f32 	%f487, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p59, %f486, %f487;
	@!%p59 bra 	$Lt_126_284418;
	.loc	22	608	0
	mov.f32 	%f488, %f35;
	mov.f32 	%f489, %f36;
	mov.f32 	%f490, %f37;
	mov.f32 	%f491, %f38;
	bra.uni 	$Lt_126_285186;
$Lt_126_284418:
	mov.f32 	%f492, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f493, %f38, %f492;
	mov.f32 	%f494, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p60, %f493, %f494;
	@!%p60 bra 	$Lt_126_284930;
	mov.f32 	%f488, %f31;
	mov.f32 	%f489, %f32;
	mov.f32 	%f490, %f33;
	mov.f32 	%f491, %f161;
	bra.uni 	$Lt_126_285186;
$Lt_126_284930:
	mov.u32 	%r134, 720;
	setp.gt.s32 	%p61, %r11, %r134;
	@%p61 bra 	$Lt_126_285442;
	.loc	22	555	0
	ld.const.f32 	%f495, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f496, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f497, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f498, %f32, %f497;
	fma.rn.ftz.f32 	%f499, %f496, %f33, %f498;
	fma.rn.ftz.f32 	%f500, %f495, %f31, %f499;
	cvt.ftz.sat.f32.f32 	%f501, %f500;
	mul.ftz.f32 	%f502, %f36, %f497;
	fma.rn.ftz.f32 	%f503, %f496, %f37, %f502;
	fma.rn.ftz.f32 	%f504, %f495, %f35, %f503;
	cvt.ftz.sat.f32.f32 	%f505, %f504;
	setp.lt.ftz.f32 	%p62, %f501, %f505;
	@!%p62 bra 	$Lt_126_239618;
	.loc	22	468	0
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f506, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f506;
	mov.f32 	%f507, %f164;
	mov.f32 	%f508, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f509, %f164, %f508;
	mov.f32 	%f510, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p63, %f509, %f510;
	@!%p63 bra 	$Lt_126_285954;
	mov.f32 	%f511, 0f00000000;   	// 0
	mov.f32 	%f512, 0f00000000;   	// 0
	mov.f32 	%f513, 0f00000000;   	// 0
	mov.f32 	%f507, 0f00000000;   	// 0
	bra.uni 	$Lt_126_285698;
$Lt_126_285954:
	mov.f32 	%f514, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f514, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f515, 0f3f800000;   	// 1
	mul.ftz.f32 	%f516, %f161, %f174;
	sub.ftz.f32 	%f178, %f515, %f516;
	mul.ftz.f32 	%f517, %f173, %f31;
	fma.rn.ftz.f32 	%f518, %f31, %f38, %f517;
	mul.ftz.f32 	%f519, %f175, %f518;
	fma.rn.ftz.f32 	%f513, %f35, %f178, %f519;
	mul.ftz.f32 	%f520, %f173, %f32;
	fma.rn.ftz.f32 	%f521, %f32, %f38, %f520;
	mul.ftz.f32 	%f522, %f175, %f521;
	fma.rn.ftz.f32 	%f512, %f36, %f178, %f522;
	mul.ftz.f32 	%f523, %f173, %f33;
	fma.rn.ftz.f32 	%f524, %f33, %f38, %f523;
	mul.ftz.f32 	%f525, %f175, %f524;
	fma.rn.ftz.f32 	%f511, %f37, %f178, %f525;
$Lt_126_285698:
	.loc	22	557	0
	mov.f32 	%f526, %f513;
	mov.f32 	%f527, %f512;
	mov.f32 	%f528, %f511;
	mov.f32 	%f529, %f507;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_303_54;
$Lt_126_239618:
	.loc	22	561	0
	mov.f32 	%f526, %f35;
	mov.f32 	%f527, %f36;
	mov.f32 	%f528, %f37;
	mov.f32 	%f529, %f38;
$LDWendi__Z10GetLuma6018PixelRGB_303_54:
	.loc	22	608	0
	mov.f32 	%f488, %f526;
	mov.f32 	%f489, %f527;
	mov.f32 	%f490, %f528;
	mov.f32 	%f491, %f529;
	bra.uni 	$Lt_126_285186;
$Lt_126_285442:
	.loc	22	569	0
	ld.const.f32 	%f530, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f531, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f532, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f533, %f32, %f532;
	fma.rn.ftz.f32 	%f534, %f531, %f33, %f533;
	fma.rn.ftz.f32 	%f535, %f530, %f31, %f534;
	cvt.ftz.sat.f32.f32 	%f536, %f535;
	mul.ftz.f32 	%f537, %f36, %f532;
	fma.rn.ftz.f32 	%f538, %f531, %f37, %f537;
	fma.rn.ftz.f32 	%f539, %f530, %f35, %f538;
	cvt.ftz.sat.f32.f32 	%f540, %f539;
	setp.lt.ftz.f32 	%p64, %f536, %f540;
	@!%p64 bra 	$Lt_126_240130;
	.loc	22	468	0
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f541, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f541;
	mov.f32 	%f542, %f164;
	mov.f32 	%f543, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f544, %f164, %f543;
	mov.f32 	%f545, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p65, %f544, %f545;
	@!%p65 bra 	$Lt_126_286466;
	mov.f32 	%f546, 0f00000000;   	// 0
	mov.f32 	%f547, 0f00000000;   	// 0
	mov.f32 	%f548, 0f00000000;   	// 0
	mov.f32 	%f542, 0f00000000;   	// 0
	bra.uni 	$Lt_126_286210;
$Lt_126_286466:
	mov.f32 	%f549, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f549, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f550, 0f3f800000;   	// 1
	mul.ftz.f32 	%f551, %f161, %f174;
	sub.ftz.f32 	%f178, %f550, %f551;
	mul.ftz.f32 	%f552, %f173, %f31;
	fma.rn.ftz.f32 	%f553, %f31, %f38, %f552;
	mul.ftz.f32 	%f554, %f175, %f553;
	fma.rn.ftz.f32 	%f548, %f35, %f178, %f554;
	mul.ftz.f32 	%f555, %f173, %f32;
	fma.rn.ftz.f32 	%f556, %f32, %f38, %f555;
	mul.ftz.f32 	%f557, %f175, %f556;
	fma.rn.ftz.f32 	%f547, %f36, %f178, %f557;
	mul.ftz.f32 	%f558, %f173, %f33;
	fma.rn.ftz.f32 	%f559, %f33, %f38, %f558;
	mul.ftz.f32 	%f560, %f175, %f559;
	fma.rn.ftz.f32 	%f546, %f37, %f178, %f560;
$Lt_126_286210:
	.loc	22	571	0
	mov.f32 	%f561, %f548;
	mov.f32 	%f562, %f547;
	mov.f32 	%f563, %f546;
	mov.f32 	%f564, %f542;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_303_52;
$Lt_126_240130:
	.loc	22	575	0
	mov.f32 	%f561, %f35;
	mov.f32 	%f562, %f36;
	mov.f32 	%f563, %f37;
	mov.f32 	%f564, %f38;
$LDWendi__Z10GetLuma7098PixelRGB_303_52:
	.loc	22	608	0
	mov.f32 	%f488, %f561;
	mov.f32 	%f489, %f562;
	mov.f32 	%f490, %f563;
	mov.f32 	%f491, %f564;
$Lt_126_285186:
$Lt_126_284674:
$Lt_126_284162:
	.loc	6	198	0
	mov.f32 	%f31, %f488;
	mov.f32 	%f32, %f489;
	mov.f32 	%f33, %f490;
	mov.f32 	%f34, %f491;
	bra.uni 	$Lt_126_317954;
$Lt_126_2562:
	.loc	22	475	0
	ld.param.f32 	%f565, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f565, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f566, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f566;
	mov.f32 	%f567, %f164;
	mov.f32 	%f568, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f569, %f164, %f568;
	mov.f32 	%f570, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p66, %f569, %f570;
	@!%p66 bra 	$Lt_126_286978;
	mov.f32 	%f571, 0f00000000;   	// 0
	mov.f32 	%f572, 0f00000000;   	// 0
	mov.f32 	%f573, 0f00000000;   	// 0
	mov.f32 	%f567, 0f00000000;   	// 0
	bra.uni 	$Lt_126_286722;
$Lt_126_286978:
	mov.f32 	%f574, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f574, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f575, 0f3f800000;   	// 1
	mul.ftz.f32 	%f576, %f161, %f174;
	sub.ftz.f32 	%f178, %f575, %f576;
	mov.f32 	%f577, 0f00000000;   	// 0
	max.ftz.f32 	%f578, %f35, %f577;
	mov.f32 	%f579, 0f3f800000;   	// 1
	min.ftz.f32 	%f580, %f578, %f579;
	mov.f32 	%f581, 0f3f800000;   	// 1
	mov.f32 	%f582, 0f00000000;   	// 0
	max.ftz.f32 	%f583, %f31, %f582;
	mov.f32 	%f584, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f585, %f583, %f584;
	sub.ftz.f32 	%f586, %f581, %f585;
	div.approx.ftz.f32 	%f587, %f580, %f586;
	mov.f32 	%f588, 0f00000000;   	// 0
	max.ftz.f32 	%f589, %f587, %f588;
	mov.f32 	%f590, 0f3f800000;   	// 1
	min.ftz.f32 	%f591, %f589, %f590;
	mul.ftz.f32 	%f592, %f38, %f591;
	fma.rn.ftz.f32 	%f593, %f31, %f173, %f592;
	mul.ftz.f32 	%f594, %f175, %f593;
	fma.rn.ftz.f32 	%f573, %f35, %f178, %f594;
	mov.f32 	%f595, 0f00000000;   	// 0
	max.ftz.f32 	%f596, %f36, %f595;
	mov.f32 	%f597, 0f3f800000;   	// 1
	min.ftz.f32 	%f598, %f596, %f597;
	mov.f32 	%f599, 0f3f800000;   	// 1
	mov.f32 	%f600, 0f00000000;   	// 0
	max.ftz.f32 	%f601, %f32, %f600;
	mov.f32 	%f602, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f603, %f601, %f602;
	sub.ftz.f32 	%f604, %f599, %f603;
	div.approx.ftz.f32 	%f605, %f598, %f604;
	mov.f32 	%f606, 0f00000000;   	// 0
	max.ftz.f32 	%f607, %f605, %f606;
	mov.f32 	%f608, 0f3f800000;   	// 1
	min.ftz.f32 	%f609, %f607, %f608;
	mul.ftz.f32 	%f610, %f38, %f609;
	fma.rn.ftz.f32 	%f611, %f32, %f173, %f610;
	mul.ftz.f32 	%f612, %f175, %f611;
	fma.rn.ftz.f32 	%f572, %f36, %f178, %f612;
	mov.f32 	%f613, 0f00000000;   	// 0
	max.ftz.f32 	%f614, %f37, %f613;
	mov.f32 	%f615, 0f3f800000;   	// 1
	min.ftz.f32 	%f616, %f614, %f615;
	mov.f32 	%f617, 0f3f800000;   	// 1
	mov.f32 	%f618, 0f00000000;   	// 0
	max.ftz.f32 	%f619, %f33, %f618;
	mov.f32 	%f620, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f621, %f619, %f620;
	sub.ftz.f32 	%f622, %f617, %f621;
	div.approx.ftz.f32 	%f623, %f616, %f622;
	mov.f32 	%f624, 0f00000000;   	// 0
	max.ftz.f32 	%f625, %f623, %f624;
	mov.f32 	%f626, 0f3f800000;   	// 1
	min.ftz.f32 	%f627, %f625, %f626;
	mul.ftz.f32 	%f628, %f38, %f627;
	fma.rn.ftz.f32 	%f629, %f33, %f173, %f628;
	mul.ftz.f32 	%f630, %f175, %f629;
	fma.rn.ftz.f32 	%f571, %f37, %f178, %f630;
$Lt_126_286722:
	.loc	6	199	0
	mov.f32 	%f31, %f573;
	mov.f32 	%f32, %f572;
	mov.f32 	%f33, %f571;
	mov.f32 	%f34, %f567;
	bra.uni 	$Lt_126_317954;
$Lt_126_2818:
	.loc	22	476	0
	ld.param.f32 	%f631, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f631, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f632, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f632;
	mov.f32 	%f633, %f164;
	mov.f32 	%f634, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f635, %f164, %f634;
	mov.f32 	%f636, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p67, %f635, %f636;
	@!%p67 bra 	$Lt_126_287490;
	mov.f32 	%f637, 0f00000000;   	// 0
	mov.f32 	%f638, 0f00000000;   	// 0
	mov.f32 	%f639, 0f00000000;   	// 0
	mov.f32 	%f633, 0f00000000;   	// 0
	bra.uni 	$Lt_126_287234;
$Lt_126_287490:
	mov.f32 	%f640, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f640, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f641, 0f3f800000;   	// 1
	mul.ftz.f32 	%f642, %f161, %f174;
	sub.ftz.f32 	%f178, %f641, %f642;
	add.ftz.f32 	%f643, %f31, %f35;
	mov.f32 	%f644, 0f00000000;   	// 0
	max.ftz.f32 	%f645, %f643, %f644;
	mov.f32 	%f646, 0f3f800000;   	// 1
	min.ftz.f32 	%f647, %f645, %f646;
	mul.ftz.f32 	%f648, %f38, %f647;
	fma.rn.ftz.f32 	%f649, %f31, %f173, %f648;
	mul.ftz.f32 	%f650, %f175, %f649;
	fma.rn.ftz.f32 	%f639, %f35, %f178, %f650;
	add.ftz.f32 	%f651, %f32, %f36;
	mov.f32 	%f652, 0f00000000;   	// 0
	max.ftz.f32 	%f653, %f651, %f652;
	mov.f32 	%f654, 0f3f800000;   	// 1
	min.ftz.f32 	%f655, %f653, %f654;
	mul.ftz.f32 	%f656, %f38, %f655;
	fma.rn.ftz.f32 	%f657, %f32, %f173, %f656;
	mul.ftz.f32 	%f658, %f175, %f657;
	fma.rn.ftz.f32 	%f638, %f36, %f178, %f658;
	add.ftz.f32 	%f659, %f33, %f37;
	mov.f32 	%f660, 0f00000000;   	// 0
	max.ftz.f32 	%f661, %f659, %f660;
	mov.f32 	%f662, 0f3f800000;   	// 1
	min.ftz.f32 	%f663, %f661, %f662;
	mul.ftz.f32 	%f664, %f38, %f663;
	fma.rn.ftz.f32 	%f665, %f33, %f173, %f664;
	mul.ftz.f32 	%f666, %f175, %f665;
	fma.rn.ftz.f32 	%f637, %f37, %f178, %f666;
$Lt_126_287234:
	.loc	6	200	0
	mov.f32 	%f31, %f639;
	mov.f32 	%f32, %f638;
	mov.f32 	%f33, %f637;
	mov.f32 	%f34, %f633;
	bra.uni 	$Lt_126_317954;
$Lt_126_3074:
	.loc	6	201	0
	ld.param.f32 	%f667, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f667, %f34;
	mov.f32 	%f668, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f669, %f161, %f668;
	mov.f32 	%f670, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p68, %f669, %f670;
	@!%p68 bra 	$Lt_126_288002;
	.loc	22	609	0
	mov.f32 	%f671, %f35;
	mov.f32 	%f672, %f36;
	mov.f32 	%f673, %f37;
	mov.f32 	%f674, %f38;
	bra.uni 	$Lt_126_288770;
$Lt_126_288002:
	mov.f32 	%f675, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f676, %f38, %f675;
	mov.f32 	%f677, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p69, %f676, %f677;
	@!%p69 bra 	$Lt_126_288514;
	mov.f32 	%f671, %f31;
	mov.f32 	%f672, %f32;
	mov.f32 	%f673, %f33;
	mov.f32 	%f674, %f161;
	bra.uni 	$Lt_126_288770;
$Lt_126_288514:
	mov.u32 	%r135, 720;
	setp.gt.s32 	%p70, %r11, %r135;
	@%p70 bra 	$Lt_126_289026;
	.loc	22	584	0
	ld.const.f32 	%f495, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f496, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f497, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f678, %f32, %f497;
	fma.rn.ftz.f32 	%f679, %f496, %f33, %f678;
	fma.rn.ftz.f32 	%f680, %f495, %f31, %f679;
	cvt.ftz.sat.f32.f32 	%f681, %f680;
	mul.ftz.f32 	%f682, %f36, %f497;
	fma.rn.ftz.f32 	%f683, %f496, %f37, %f682;
	fma.rn.ftz.f32 	%f684, %f495, %f35, %f683;
	cvt.ftz.sat.f32.f32 	%f685, %f684;
	setp.gt.ftz.f32 	%p71, %f681, %f685;
	@!%p71 bra 	$Lt_126_241922;
	.loc	22	468	0
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f686, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f686;
	mov.f32 	%f687, %f164;
	mov.f32 	%f688, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f689, %f164, %f688;
	mov.f32 	%f690, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p72, %f689, %f690;
	@!%p72 bra 	$Lt_126_289538;
	mov.f32 	%f691, 0f00000000;   	// 0
	mov.f32 	%f692, 0f00000000;   	// 0
	mov.f32 	%f693, 0f00000000;   	// 0
	mov.f32 	%f687, 0f00000000;   	// 0
	bra.uni 	$Lt_126_289282;
$Lt_126_289538:
	mov.f32 	%f694, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f694, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f695, 0f3f800000;   	// 1
	mul.ftz.f32 	%f696, %f161, %f174;
	sub.ftz.f32 	%f178, %f695, %f696;
	mul.ftz.f32 	%f697, %f173, %f31;
	fma.rn.ftz.f32 	%f698, %f31, %f38, %f697;
	mul.ftz.f32 	%f699, %f175, %f698;
	fma.rn.ftz.f32 	%f693, %f35, %f178, %f699;
	mul.ftz.f32 	%f700, %f173, %f32;
	fma.rn.ftz.f32 	%f701, %f32, %f38, %f700;
	mul.ftz.f32 	%f702, %f175, %f701;
	fma.rn.ftz.f32 	%f692, %f36, %f178, %f702;
	mul.ftz.f32 	%f703, %f173, %f33;
	fma.rn.ftz.f32 	%f704, %f33, %f38, %f703;
	mul.ftz.f32 	%f705, %f175, %f704;
	fma.rn.ftz.f32 	%f691, %f37, %f178, %f705;
$Lt_126_289282:
	.loc	22	586	0
	mov.f32 	%f706, %f693;
	mov.f32 	%f707, %f692;
	mov.f32 	%f708, %f691;
	mov.f32 	%f709, %f687;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_303_50;
$Lt_126_241922:
	.loc	22	590	0
	mov.f32 	%f706, %f35;
	mov.f32 	%f707, %f36;
	mov.f32 	%f708, %f37;
	mov.f32 	%f709, %f38;
$LDWendi__Z10GetLuma6018PixelRGB_303_50:
	.loc	22	609	0
	mov.f32 	%f671, %f706;
	mov.f32 	%f672, %f707;
	mov.f32 	%f673, %f708;
	mov.f32 	%f674, %f709;
	bra.uni 	$Lt_126_288770;
$Lt_126_289026:
	.loc	22	598	0
	ld.const.f32 	%f710, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f711, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f712, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f713, %f32, %f712;
	fma.rn.ftz.f32 	%f714, %f711, %f33, %f713;
	fma.rn.ftz.f32 	%f715, %f710, %f31, %f714;
	cvt.ftz.sat.f32.f32 	%f716, %f715;
	mul.ftz.f32 	%f717, %f36, %f712;
	fma.rn.ftz.f32 	%f718, %f711, %f37, %f717;
	fma.rn.ftz.f32 	%f719, %f710, %f35, %f718;
	cvt.ftz.sat.f32.f32 	%f720, %f719;
	setp.gt.ftz.f32 	%p73, %f716, %f720;
	@!%p73 bra 	$Lt_126_242434;
	.loc	22	468	0
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f721, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f721;
	mov.f32 	%f722, %f164;
	mov.f32 	%f723, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f724, %f164, %f723;
	mov.f32 	%f725, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p74, %f724, %f725;
	@!%p74 bra 	$Lt_126_290050;
	mov.f32 	%f726, 0f00000000;   	// 0
	mov.f32 	%f727, 0f00000000;   	// 0
	mov.f32 	%f728, 0f00000000;   	// 0
	mov.f32 	%f722, 0f00000000;   	// 0
	bra.uni 	$Lt_126_289794;
$Lt_126_290050:
	mov.f32 	%f729, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f729, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f730, 0f3f800000;   	// 1
	mul.ftz.f32 	%f731, %f161, %f174;
	sub.ftz.f32 	%f178, %f730, %f731;
	mul.ftz.f32 	%f732, %f173, %f31;
	fma.rn.ftz.f32 	%f733, %f31, %f38, %f732;
	mul.ftz.f32 	%f734, %f175, %f733;
	fma.rn.ftz.f32 	%f728, %f35, %f178, %f734;
	mul.ftz.f32 	%f735, %f173, %f32;
	fma.rn.ftz.f32 	%f736, %f32, %f38, %f735;
	mul.ftz.f32 	%f737, %f175, %f736;
	fma.rn.ftz.f32 	%f727, %f36, %f178, %f737;
	mul.ftz.f32 	%f738, %f173, %f33;
	fma.rn.ftz.f32 	%f739, %f33, %f38, %f738;
	mul.ftz.f32 	%f740, %f175, %f739;
	fma.rn.ftz.f32 	%f726, %f37, %f178, %f740;
$Lt_126_289794:
	.loc	22	600	0
	mov.f32 	%f741, %f728;
	mov.f32 	%f742, %f727;
	mov.f32 	%f743, %f726;
	mov.f32 	%f744, %f722;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_303_48;
$Lt_126_242434:
	.loc	22	604	0
	mov.f32 	%f741, %f35;
	mov.f32 	%f742, %f36;
	mov.f32 	%f743, %f37;
	mov.f32 	%f744, %f38;
$LDWendi__Z10GetLuma7098PixelRGB_303_48:
	.loc	22	609	0
	mov.f32 	%f671, %f741;
	mov.f32 	%f672, %f742;
	mov.f32 	%f673, %f743;
	mov.f32 	%f674, %f744;
$Lt_126_288770:
$Lt_126_288258:
$Lt_126_287746:
	.loc	6	201	0
	mov.f32 	%f31, %f671;
	mov.f32 	%f32, %f672;
	mov.f32 	%f33, %f673;
	mov.f32 	%f34, %f674;
	bra.uni 	$Lt_126_317954;
$Lt_126_3330:
	.loc	22	477	0
	ld.param.f32 	%f745, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f745, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f746, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f746;
	mov.f32 	%f747, %f164;
	mov.f32 	%f748, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f749, %f164, %f748;
	mov.f32 	%f750, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p75, %f749, %f750;
	@!%p75 bra 	$Lt_126_290562;
	mov.f32 	%f751, 0f00000000;   	// 0
	mov.f32 	%f752, 0f00000000;   	// 0
	mov.f32 	%f753, 0f00000000;   	// 0
	mov.f32 	%f747, 0f00000000;   	// 0
	bra.uni 	$Lt_126_290306;
$Lt_126_290562:
	.loc	22	373	0
	mov.f32 	%f754, 0f00000000;   	// 0
	max.ftz.f32 	%f755, %f35, %f754;
	mov.f32 	%f756, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f31, %f756;
	mov.f32 	%f758, 0f3f800000;   	// 1
	min.ftz.f32 	%f759, %f755, %f758;
	mov.f32 	%f760, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f760;
	mov.f32 	%f762, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p76, %f759, %f762;
	@!%p76 bra 	$Lt_126_291074;
	add.ftz.f32 	%f763, %f761, %f761;
	mul.ftz.f32 	%f764, %f759, %f763;
	bra.uni 	$Lt_126_290818;
$Lt_126_291074:
	mov.f32 	%f765, 0f3f800000;   	// 1
	sub.ftz.f32 	%f766, %f765, %f761;
	mov.f32 	%f767, 0f3f800000;   	// 1
	add.ftz.f32 	%f768, %f766, %f766;
	mov.f32 	%f769, 0f3f800000;   	// 1
	sub.ftz.f32 	%f770, %f769, %f759;
	mul.ftz.f32 	%f771, %f768, %f770;
	sub.ftz.f32 	%f764, %f767, %f771;
$Lt_126_290818:
	.loc	22	477	0
	mov.f32 	%f772, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f772, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f773, 0f3f800000;   	// 1
	mul.ftz.f32 	%f774, %f161, %f174;
	sub.ftz.f32 	%f178, %f773, %f774;
	mov.f32 	%f775, 0f00000000;   	// 0
	max.ftz.f32 	%f776, %f764, %f775;
	mov.f32 	%f777, 0f3f800000;   	// 1
	min.ftz.f32 	%f778, %f776, %f777;
	mul.ftz.f32 	%f779, %f38, %f778;
	fma.rn.ftz.f32 	%f780, %f31, %f173, %f779;
	mul.ftz.f32 	%f781, %f175, %f780;
	fma.rn.ftz.f32 	%f753, %f35, %f178, %f781;
	.loc	22	373	0
	mov.f32 	%f782, 0f00000000;   	// 0
	max.ftz.f32 	%f783, %f36, %f782;
	mov.f32 	%f784, 0f00000000;   	// 0
	max.ftz.f32 	%f785, %f32, %f784;
	mov.f32 	%f786, 0f3f800000;   	// 1
	min.ftz.f32 	%f787, %f783, %f786;
	mov.f32 	%f788, 0f3f800000;   	// 1
	min.ftz.f32 	%f789, %f785, %f788;
	mov.f32 	%f790, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p77, %f787, %f790;
	@!%p77 bra 	$Lt_126_291586;
	add.ftz.f32 	%f791, %f789, %f789;
	mul.ftz.f32 	%f792, %f787, %f791;
	bra.uni 	$Lt_126_291330;
$Lt_126_291586:
	mov.f32 	%f793, 0f3f800000;   	// 1
	sub.ftz.f32 	%f794, %f793, %f789;
	mov.f32 	%f795, 0f3f800000;   	// 1
	add.ftz.f32 	%f796, %f794, %f794;
	mov.f32 	%f797, 0f3f800000;   	// 1
	sub.ftz.f32 	%f798, %f797, %f787;
	mul.ftz.f32 	%f799, %f796, %f798;
	sub.ftz.f32 	%f792, %f795, %f799;
$Lt_126_291330:
	.loc	22	477	0
	mov.f32 	%f800, 0f00000000;   	// 0
	max.ftz.f32 	%f801, %f792, %f800;
	mov.f32 	%f802, 0f3f800000;   	// 1
	min.ftz.f32 	%f803, %f801, %f802;
	mul.ftz.f32 	%f804, %f38, %f803;
	fma.rn.ftz.f32 	%f805, %f32, %f173, %f804;
	mul.ftz.f32 	%f806, %f175, %f805;
	fma.rn.ftz.f32 	%f752, %f36, %f178, %f806;
	.loc	22	373	0
	mov.f32 	%f807, 0f00000000;   	// 0
	max.ftz.f32 	%f808, %f37, %f807;
	mov.f32 	%f809, 0f00000000;   	// 0
	max.ftz.f32 	%f810, %f33, %f809;
	mov.f32 	%f811, 0f3f800000;   	// 1
	min.ftz.f32 	%f812, %f808, %f811;
	mov.f32 	%f813, 0f3f800000;   	// 1
	min.ftz.f32 	%f814, %f810, %f813;
	mov.f32 	%f815, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p78, %f812, %f815;
	@!%p78 bra 	$Lt_126_292098;
	add.ftz.f32 	%f816, %f814, %f814;
	mul.ftz.f32 	%f817, %f812, %f816;
	bra.uni 	$Lt_126_291842;
$Lt_126_292098:
	mov.f32 	%f818, 0f3f800000;   	// 1
	sub.ftz.f32 	%f819, %f818, %f814;
	mov.f32 	%f820, 0f3f800000;   	// 1
	add.ftz.f32 	%f821, %f819, %f819;
	mov.f32 	%f822, 0f3f800000;   	// 1
	sub.ftz.f32 	%f823, %f822, %f812;
	mul.ftz.f32 	%f824, %f821, %f823;
	sub.ftz.f32 	%f817, %f820, %f824;
$Lt_126_291842:
	.loc	22	477	0
	mov.f32 	%f825, 0f00000000;   	// 0
	max.ftz.f32 	%f826, %f817, %f825;
	mov.f32 	%f827, 0f3f800000;   	// 1
	min.ftz.f32 	%f828, %f826, %f827;
	mul.ftz.f32 	%f829, %f38, %f828;
	fma.rn.ftz.f32 	%f830, %f33, %f173, %f829;
	mul.ftz.f32 	%f831, %f175, %f830;
	fma.rn.ftz.f32 	%f751, %f37, %f178, %f831;
$Lt_126_290306:
	.loc	6	202	0
	mov.f32 	%f31, %f753;
	mov.f32 	%f32, %f752;
	mov.f32 	%f33, %f751;
	mov.f32 	%f34, %f747;
	bra.uni 	$Lt_126_317954;
$Lt_126_3586:
	.loc	22	478	0
	ld.param.f32 	%f832, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f832, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f833, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f833;
	mov.f32 	%f834, %f164;
	mov.f32 	%f835, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f836, %f164, %f835;
	mov.f32 	%f837, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p79, %f836, %f837;
	@!%p79 bra 	$Lt_126_292610;
	mov.f32 	%f838, 0f00000000;   	// 0
	mov.f32 	%f839, 0f00000000;   	// 0
	mov.f32 	%f840, 0f00000000;   	// 0
	mov.f32 	%f834, 0f00000000;   	// 0
	bra.uni 	$Lt_126_292354;
$Lt_126_292610:
	.loc	22	380	0
	mov.f32 	%f841, 0f00000000;   	// 0
	max.ftz.f32 	%f755, %f35, %f841;
	mov.f32 	%f842, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f31, %f842;
	mov.f32 	%f843, 0f3f800000;   	// 1
	min.ftz.f32 	%f759, %f755, %f843;
	mov.f32 	%f844, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f844;
	add.ftz.f32 	%f845, %f761, %f761;
	mov.f32 	%f846, 0fbf800000;   	// -1
	add.ftz.f32 	%f847, %f845, %f846;
	mov.f32 	%f848, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p80, %f761, %f848;
	@!%p80 bra 	$Lt_126_293122;
	mul.ftz.f32 	%f849, %f759, %f759;
	sub.ftz.f32 	%f850, %f759, %f849;
	fma.rn.ftz.f32 	%f851, %f847, %f850, %f759;
	bra.uni 	$Lt_126_292866;
$Lt_126_293122:
	sqrt.approx.ftz.f32 	%f852, %f759;
	sub.ftz.f32 	%f853, %f852, %f759;
	fma.rn.ftz.f32 	%f851, %f847, %f853, %f759;
$Lt_126_292866:
	.loc	22	478	0
	mov.f32 	%f854, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f854, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f855, 0f3f800000;   	// 1
	mul.ftz.f32 	%f856, %f161, %f174;
	sub.ftz.f32 	%f178, %f855, %f856;
	mov.f32 	%f857, 0f00000000;   	// 0
	max.ftz.f32 	%f858, %f851, %f857;
	mov.f32 	%f859, 0f3f800000;   	// 1
	min.ftz.f32 	%f860, %f858, %f859;
	mul.ftz.f32 	%f861, %f38, %f860;
	fma.rn.ftz.f32 	%f862, %f31, %f173, %f861;
	mul.ftz.f32 	%f863, %f175, %f862;
	fma.rn.ftz.f32 	%f840, %f35, %f178, %f863;
	.loc	22	380	0
	mov.f32 	%f864, 0f00000000;   	// 0
	max.ftz.f32 	%f783, %f36, %f864;
	mov.f32 	%f865, 0f00000000;   	// 0
	max.ftz.f32 	%f785, %f32, %f865;
	mov.f32 	%f866, 0f3f800000;   	// 1
	min.ftz.f32 	%f787, %f783, %f866;
	mov.f32 	%f867, 0f3f800000;   	// 1
	min.ftz.f32 	%f789, %f785, %f867;
	add.ftz.f32 	%f868, %f789, %f789;
	mov.f32 	%f869, 0fbf800000;   	// -1
	add.ftz.f32 	%f870, %f868, %f869;
	mov.f32 	%f871, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p81, %f789, %f871;
	@!%p81 bra 	$Lt_126_293634;
	mul.ftz.f32 	%f872, %f787, %f787;
	sub.ftz.f32 	%f873, %f787, %f872;
	fma.rn.ftz.f32 	%f874, %f870, %f873, %f787;
	bra.uni 	$Lt_126_293378;
$Lt_126_293634:
	sqrt.approx.ftz.f32 	%f875, %f787;
	sub.ftz.f32 	%f876, %f875, %f787;
	fma.rn.ftz.f32 	%f874, %f870, %f876, %f787;
$Lt_126_293378:
	.loc	22	478	0
	mov.f32 	%f877, 0f00000000;   	// 0
	max.ftz.f32 	%f878, %f874, %f877;
	mov.f32 	%f879, 0f3f800000;   	// 1
	min.ftz.f32 	%f880, %f878, %f879;
	mul.ftz.f32 	%f881, %f38, %f880;
	fma.rn.ftz.f32 	%f882, %f32, %f173, %f881;
	mul.ftz.f32 	%f883, %f175, %f882;
	fma.rn.ftz.f32 	%f839, %f36, %f178, %f883;
	.loc	22	380	0
	mov.f32 	%f884, 0f00000000;   	// 0
	max.ftz.f32 	%f808, %f37, %f884;
	mov.f32 	%f885, 0f00000000;   	// 0
	max.ftz.f32 	%f810, %f33, %f885;
	mov.f32 	%f886, 0f3f800000;   	// 1
	min.ftz.f32 	%f812, %f808, %f886;
	mov.f32 	%f887, 0f3f800000;   	// 1
	min.ftz.f32 	%f814, %f810, %f887;
	add.ftz.f32 	%f888, %f814, %f814;
	mov.f32 	%f889, 0fbf800000;   	// -1
	add.ftz.f32 	%f890, %f888, %f889;
	mov.f32 	%f891, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p82, %f814, %f891;
	@!%p82 bra 	$Lt_126_294146;
	mul.ftz.f32 	%f892, %f812, %f812;
	sub.ftz.f32 	%f893, %f812, %f892;
	fma.rn.ftz.f32 	%f894, %f890, %f893, %f812;
	bra.uni 	$Lt_126_293890;
$Lt_126_294146:
	sqrt.approx.ftz.f32 	%f895, %f812;
	sub.ftz.f32 	%f896, %f895, %f812;
	fma.rn.ftz.f32 	%f894, %f890, %f896, %f812;
$Lt_126_293890:
	.loc	22	478	0
	mov.f32 	%f897, 0f00000000;   	// 0
	max.ftz.f32 	%f898, %f894, %f897;
	mov.f32 	%f899, 0f3f800000;   	// 1
	min.ftz.f32 	%f900, %f898, %f899;
	mul.ftz.f32 	%f901, %f38, %f900;
	fma.rn.ftz.f32 	%f902, %f33, %f173, %f901;
	mul.ftz.f32 	%f903, %f175, %f902;
	fma.rn.ftz.f32 	%f838, %f37, %f178, %f903;
$Lt_126_292354:
	.loc	6	203	0
	mov.f32 	%f31, %f840;
	mov.f32 	%f32, %f839;
	mov.f32 	%f33, %f838;
	mov.f32 	%f34, %f834;
	bra.uni 	$Lt_126_317954;
$Lt_126_3842:
	.loc	22	479	0
	ld.param.f32 	%f904, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f904, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f905, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f905;
	mov.f32 	%f906, %f164;
	mov.f32 	%f907, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f908, %f164, %f907;
	mov.f32 	%f909, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p83, %f908, %f909;
	@!%p83 bra 	$Lt_126_294658;
	mov.f32 	%f910, 0f00000000;   	// 0
	mov.f32 	%f911, 0f00000000;   	// 0
	mov.f32 	%f912, 0f00000000;   	// 0
	mov.f32 	%f906, 0f00000000;   	// 0
	bra.uni 	$Lt_126_294402;
$Lt_126_294658:
	.loc	22	386	0
	mov.f32 	%f913, 0f00000000;   	// 0
	max.ftz.f32 	%f755, %f35, %f913;
	mov.f32 	%f914, 0f00000000;   	// 0
	max.ftz.f32 	%f757, %f31, %f914;
	mov.f32 	%f915, 0f3f800000;   	// 1
	min.ftz.f32 	%f759, %f755, %f915;
	mov.f32 	%f916, 0f3f800000;   	// 1
	min.ftz.f32 	%f761, %f757, %f916;
	mov.f32 	%f917, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p84, %f761, %f917;
	@!%p84 bra 	$Lt_126_295170;
	add.ftz.f32 	%f918, %f761, %f761;
	mul.ftz.f32 	%f919, %f759, %f918;
	bra.uni 	$Lt_126_294914;
$Lt_126_295170:
	mov.f32 	%f920, 0f3f800000;   	// 1
	sub.ftz.f32 	%f921, %f920, %f761;
	mov.f32 	%f922, 0f3f800000;   	// 1
	add.ftz.f32 	%f923, %f921, %f921;
	mov.f32 	%f924, 0f3f800000;   	// 1
	sub.ftz.f32 	%f925, %f924, %f759;
	mul.ftz.f32 	%f926, %f923, %f925;
	sub.ftz.f32 	%f919, %f922, %f926;
$Lt_126_294914:
	.loc	22	479	0
	mov.f32 	%f927, 0f3f800000;   	// 1
	sub.ftz.f32 	%f173, %f927, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f928, 0f3f800000;   	// 1
	mul.ftz.f32 	%f929, %f161, %f174;
	sub.ftz.f32 	%f178, %f928, %f929;
	mov.f32 	%f930, 0f00000000;   	// 0
	max.ftz.f32 	%f931, %f919, %f930;
	mov.f32 	%f932, 0f3f800000;   	// 1
	min.ftz.f32 	%f933, %f931, %f932;
	mul.ftz.f32 	%f934, %f38, %f933;
	fma.rn.ftz.f32 	%f935, %f31, %f173, %f934;
	mul.ftz.f32 	%f936, %f175, %f935;
	fma.rn.ftz.f32 	%f912, %f35, %f178, %f936;
	.loc	22	386	0
	mov.f32 	%f937, 0f00000000;   	// 0
	max.ftz.f32 	%f783, %f36, %f937;
	mov.f32 	%f938, 0f00000000;   	// 0
	max.ftz.f32 	%f785, %f32, %f938;
	mov.f32 	%f939, 0f3f800000;   	// 1
	min.ftz.f32 	%f787, %f783, %f939;
	mov.f32 	%f940, 0f3f800000;   	// 1
	min.ftz.f32 	%f789, %f785, %f940;
	mov.f32 	%f941, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p85, %f789, %f941;
	@!%p85 bra 	$Lt_126_295682;
	add.ftz.f32 	%f942, %f789, %f789;
	mul.ftz.f32 	%f943, %f787, %f942;
	bra.uni 	$Lt_126_295426;
$Lt_126_295682:
	mov.f32 	%f944, 0f3f800000;   	// 1
	sub.ftz.f32 	%f945, %f944, %f789;
	mov.f32 	%f946, 0f3f800000;   	// 1
	add.ftz.f32 	%f947, %f945, %f945;
	mov.f32 	%f948, 0f3f800000;   	// 1
	sub.ftz.f32 	%f949, %f948, %f787;
	mul.ftz.f32 	%f950, %f947, %f949;
	sub.ftz.f32 	%f943, %f946, %f950;
$Lt_126_295426:
	.loc	22	479	0
	mov.f32 	%f951, 0f00000000;   	// 0
	max.ftz.f32 	%f952, %f943, %f951;
	mov.f32 	%f953, 0f3f800000;   	// 1
	min.ftz.f32 	%f954, %f952, %f953;
	mul.ftz.f32 	%f955, %f38, %f954;
	fma.rn.ftz.f32 	%f956, %f32, %f173, %f955;
	mul.ftz.f32 	%f957, %f175, %f956;
	fma.rn.ftz.f32 	%f911, %f36, %f178, %f957;
	.loc	22	386	0
	mov.f32 	%f958, 0f00000000;   	// 0
	max.ftz.f32 	%f808, %f37, %f958;
	mov.f32 	%f959, 0f00000000;   	// 0
	max.ftz.f32 	%f810, %f33, %f959;
	mov.f32 	%f960, 0f3f800000;   	// 1
	min.ftz.f32 	%f812, %f808, %f960;
	mov.f32 	%f961, 0f3f800000;   	// 1
	min.ftz.f32 	%f814, %f810, %f961;
	mov.f32 	%f962, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p86, %f814, %f962;
	@!%p86 bra 	$Lt_126_296194;
	add.ftz.f32 	%f963, %f814, %f814;
	mul.ftz.f32 	%f964, %f812, %f963;
	bra.uni 	$Lt_126_295938;
$Lt_126_296194:
	mov.f32 	%f965, 0f3f800000;   	// 1
	sub.ftz.f32 	%f966, %f965, %f814;
	mov.f32 	%f967, 0f3f800000;   	// 1
	add.ftz.f32 	%f968, %f966, %f966;
	mov.f32 	%f969, 0f3f800000;   	// 1
	sub.ftz.f32 	%f970, %f969, %f812;
	mul.ftz.f32 	%f971, %f968, %f970;
	sub.ftz.f32 	%f964, %f967, %f971;
$Lt_126_295938:
	.loc	22	479	0
	mov.f32 	%f972, 0f00000000;   	// 0
	max.ftz.f32 	%f973, %f964, %f972;
	mov.f32 	%f974, 0f3f800000;   	// 1
	min.ftz.f32 	%f975, %f973, %f974;
	mul.ftz.f32 	%f976, %f38, %f975;
	fma.rn.ftz.f32 	%f977, %f33, %f173, %f976;
	mul.ftz.f32 	%f978, %f175, %f977;
	fma.rn.ftz.f32 	%f910, %f37, %f178, %f978;
$Lt_126_294402:
	.loc	6	204	0
	mov.f32 	%f31, %f912;
	mov.f32 	%f32, %f911;
	mov.f32 	%f33, %f910;
	mov.f32 	%f34, %f906;
	bra.uni 	$Lt_126_317954;
$Lt_126_4098:
	.loc	22	480	0
	ld.param.f32 	%f979, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f979, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f980, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f980;
	mov.f32 	%f981, %f164;
	mov.f32 	%f982, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f983, %f164, %f982;
	mov.f32 	%f984, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p87, %f983, %f984;
	@!%p87 bra 	$Lt_126_296706;
	mov.f32 	%f985, 0f00000000;   	// 0
	mov.f32 	%f986, 0f00000000;   	// 0
	mov.f32 	%f987, 0f00000000;   	// 0
	mov.f32 	%f981, 0f00000000;   	// 0
	bra.uni 	$Lt_126_296450;
$Lt_126_296706:
	.loc	22	431	0
	mov.f32 	%f988, 0f00000000;   	// 0
	max.ftz.f32 	%f755, %f35, %f988;
	mov.f32 	%f989, 0f358637bd;   	// 1e-006
	max.ftz.f32 	%f990, %f31, %f989;
	mov.f32 	%f991, 0f3f800000;   	// 1
	min.ftz.f32 	%f759, %f755, %f991;
	mov.f32 	%f992, 0f3f7fffef;   	// 0.999999
	min.ftz.f32 	%f993, %f990, %f992;
	mov.f32 	%f994, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p88, %f993, %f994;
	@!%p88 bra 	$Lt_126_246274;
	.loc	22	433	0
	mov.f32 	%f995, 0f3f800000;   	// 1
	mov.f32 	%f996, 0f3f800000;   	// 1
	sub.ftz.f32 	%f997, %f996, %f759;
	add.ftz.f32 	%f998, %f993, %f993;
	div.approx.ftz.f32 	%f999, %f997, %f998;
	sub.ftz.f32 	%f1000, %f995, %f999;
	mov.f32 	%f1001, 0f00000000;  	// 0
	max.ftz.f32 	%f1002, %f1000, %f1001;
	mov.f32 	%f1003, 0f3f800000;  	// 1
	min.ftz.f32 	%f1004, %f1002, %f1003;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__303_46;
$Lt_126_246274:
	.loc	22	437	0
	mov.f32 	%f1005, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1006, %f1005, %f993;
	add.ftz.f32 	%f1007, %f1006, %f1006;
	div.approx.ftz.f32 	%f1008, %f759, %f1007;
	mov.f32 	%f1009, 0f00000000;  	// 0
	max.ftz.f32 	%f1010, %f1008, %f1009;
	mov.f32 	%f1011, 0f3f800000;  	// 1
	min.ftz.f32 	%f1004, %f1010, %f1011;
$LDWendi__Z5ClampIfET_S0_S0_S0__303_46:
	.loc	22	480	0
	mov.f32 	%f1012, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1012, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1013, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1014, %f161, %f174;
	sub.ftz.f32 	%f178, %f1013, %f1014;
	mul.ftz.f32 	%f1015, %f1004, %f38;
	fma.rn.ftz.f32 	%f1016, %f31, %f173, %f1015;
	mul.ftz.f32 	%f1017, %f175, %f1016;
	fma.rn.ftz.f32 	%f987, %f35, %f178, %f1017;
	.loc	22	431	0
	mov.f32 	%f1018, 0f00000000;  	// 0
	max.ftz.f32 	%f783, %f36, %f1018;
	mov.f32 	%f1019, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1020, %f32, %f1019;
	mov.f32 	%f1021, 0f3f800000;  	// 1
	min.ftz.f32 	%f787, %f783, %f1021;
	mov.f32 	%f1022, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1023, %f1020, %f1022;
	mov.f32 	%f1024, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p89, %f1023, %f1024;
	@!%p89 bra 	$Lt_126_246530;
	.loc	22	433	0
	mov.f32 	%f1025, 0f3f800000;  	// 1
	mov.f32 	%f1026, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1027, %f1026, %f787;
	add.ftz.f32 	%f1028, %f1023, %f1023;
	div.approx.ftz.f32 	%f1029, %f1027, %f1028;
	sub.ftz.f32 	%f1030, %f1025, %f1029;
	mov.f32 	%f1031, 0f00000000;  	// 0
	max.ftz.f32 	%f1032, %f1030, %f1031;
	mov.f32 	%f1033, 0f3f800000;  	// 1
	min.ftz.f32 	%f1034, %f1032, %f1033;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__303_44;
$Lt_126_246530:
	.loc	22	437	0
	mov.f32 	%f1035, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1036, %f1035, %f1023;
	add.ftz.f32 	%f1037, %f1036, %f1036;
	div.approx.ftz.f32 	%f1038, %f787, %f1037;
	mov.f32 	%f1039, 0f00000000;  	// 0
	max.ftz.f32 	%f1040, %f1038, %f1039;
	mov.f32 	%f1041, 0f3f800000;  	// 1
	min.ftz.f32 	%f1034, %f1040, %f1041;
$LDWendi__Z5ClampIfET_S0_S0_S0__303_44:
	.loc	22	480	0
	mul.ftz.f32 	%f1042, %f1034, %f38;
	fma.rn.ftz.f32 	%f1043, %f32, %f173, %f1042;
	mul.ftz.f32 	%f1044, %f175, %f1043;
	fma.rn.ftz.f32 	%f986, %f36, %f178, %f1044;
	.loc	22	431	0
	mov.f32 	%f1045, 0f00000000;  	// 0
	max.ftz.f32 	%f808, %f37, %f1045;
	mov.f32 	%f1046, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1047, %f33, %f1046;
	mov.f32 	%f1048, 0f3f800000;  	// 1
	min.ftz.f32 	%f812, %f808, %f1048;
	mov.f32 	%f1049, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1050, %f1047, %f1049;
	mov.f32 	%f1051, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p90, %f1050, %f1051;
	@!%p90 bra 	$Lt_126_246786;
	.loc	22	433	0
	mov.f32 	%f1052, 0f3f800000;  	// 1
	mov.f32 	%f1053, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1054, %f1053, %f812;
	add.ftz.f32 	%f1055, %f1050, %f1050;
	div.approx.ftz.f32 	%f1056, %f1054, %f1055;
	sub.ftz.f32 	%f1057, %f1052, %f1056;
	mov.f32 	%f1058, 0f00000000;  	// 0
	max.ftz.f32 	%f1059, %f1057, %f1058;
	mov.f32 	%f1060, 0f3f800000;  	// 1
	min.ftz.f32 	%f1061, %f1059, %f1060;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__303_42;
$Lt_126_246786:
	.loc	22	437	0
	mov.f32 	%f1062, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1063, %f1062, %f1050;
	add.ftz.f32 	%f1064, %f1063, %f1063;
	div.approx.ftz.f32 	%f1065, %f812, %f1064;
	mov.f32 	%f1066, 0f00000000;  	// 0
	max.ftz.f32 	%f1067, %f1065, %f1066;
	mov.f32 	%f1068, 0f3f800000;  	// 1
	min.ftz.f32 	%f1061, %f1067, %f1068;
$LDWendi__Z5ClampIfET_S0_S0_S0__303_42:
	.loc	22	480	0
	mul.ftz.f32 	%f1069, %f1061, %f38;
	fma.rn.ftz.f32 	%f1070, %f33, %f173, %f1069;
	mul.ftz.f32 	%f1071, %f175, %f1070;
	fma.rn.ftz.f32 	%f985, %f37, %f178, %f1071;
$Lt_126_296450:
	.loc	6	205	0
	mov.f32 	%f31, %f987;
	mov.f32 	%f32, %f986;
	mov.f32 	%f33, %f985;
	mov.f32 	%f34, %f981;
	bra.uni 	$Lt_126_317954;
$Lt_126_4354:
	.loc	22	481	0
	ld.param.f32 	%f1072, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1072, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1073, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1073;
	mov.f32 	%f1074, %f164;
	mov.f32 	%f1075, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1076, %f164, %f1075;
	mov.f32 	%f1077, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p91, %f1076, %f1077;
	@!%p91 bra 	$Lt_126_297218;
	mov.f32 	%f1078, 0f00000000;  	// 0
	mov.f32 	%f1079, 0f00000000;  	// 0
	mov.f32 	%f1080, 0f00000000;  	// 0
	mov.f32 	%f1074, 0f00000000;  	// 0
	bra.uni 	$Lt_126_296962;
$Lt_126_297218:
	mov.f32 	%f1081, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1081, %f38;
	mov.f32 	%f1082, 0f00000000;  	// 0
	max.ftz.f32 	%f757, %f31, %f1082;
	mov.f32 	%f1083, 0f3f800000;  	// 1
	min.ftz.f32 	%f761, %f757, %f1083;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1084, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1085, %f161, %f174;
	sub.ftz.f32 	%f178, %f1084, %f1085;
	add.ftz.f32 	%f1086, %f761, %f761;
	mov.f32 	%f1087, 0f00000000;  	// 0
	max.ftz.f32 	%f1088, %f35, %f1087;
	mov.f32 	%f1089, 0f3f800000;  	// 1
	min.ftz.f32 	%f1090, %f1088, %f1089;
	add.ftz.f32 	%f1091, %f1086, %f1090;
	mov.f32 	%f1092, 0fbf800000;  	// -1
	add.ftz.f32 	%f1093, %f1091, %f1092;
	mul.ftz.f32 	%f1094, %f38, %f1093;
	fma.rn.ftz.f32 	%f1095, %f31, %f173, %f1094;
	mul.ftz.f32 	%f1096, %f175, %f1095;
	fma.rn.ftz.f32 	%f1080, %f35, %f178, %f1096;
	mov.f32 	%f1097, 0f00000000;  	// 0
	max.ftz.f32 	%f785, %f32, %f1097;
	mov.f32 	%f1098, 0f3f800000;  	// 1
	min.ftz.f32 	%f789, %f785, %f1098;
	add.ftz.f32 	%f1099, %f789, %f789;
	mov.f32 	%f1100, 0f00000000;  	// 0
	max.ftz.f32 	%f1101, %f36, %f1100;
	mov.f32 	%f1102, 0f3f800000;  	// 1
	min.ftz.f32 	%f1103, %f1101, %f1102;
	add.ftz.f32 	%f1104, %f1099, %f1103;
	mov.f32 	%f1105, 0fbf800000;  	// -1
	add.ftz.f32 	%f1106, %f1104, %f1105;
	mul.ftz.f32 	%f1107, %f38, %f1106;
	fma.rn.ftz.f32 	%f1108, %f32, %f173, %f1107;
	mul.ftz.f32 	%f1109, %f175, %f1108;
	fma.rn.ftz.f32 	%f1079, %f36, %f178, %f1109;
	mov.f32 	%f1110, 0f00000000;  	// 0
	max.ftz.f32 	%f810, %f33, %f1110;
	mov.f32 	%f1111, 0f3f800000;  	// 1
	min.ftz.f32 	%f814, %f810, %f1111;
	add.ftz.f32 	%f1112, %f814, %f814;
	mov.f32 	%f1113, 0f00000000;  	// 0
	max.ftz.f32 	%f1114, %f37, %f1113;
	mov.f32 	%f1115, 0f3f800000;  	// 1
	min.ftz.f32 	%f1116, %f1114, %f1115;
	add.ftz.f32 	%f1117, %f1112, %f1116;
	mov.f32 	%f1118, 0fbf800000;  	// -1
	add.ftz.f32 	%f1119, %f1117, %f1118;
	mul.ftz.f32 	%f1120, %f38, %f1119;
	fma.rn.ftz.f32 	%f1121, %f33, %f173, %f1120;
	mul.ftz.f32 	%f1122, %f175, %f1121;
	fma.rn.ftz.f32 	%f1078, %f37, %f178, %f1122;
$Lt_126_296962:
	.loc	6	206	0
	mov.f32 	%f31, %f1080;
	mov.f32 	%f32, %f1079;
	mov.f32 	%f33, %f1078;
	mov.f32 	%f34, %f1074;
	bra.uni 	$Lt_126_317954;
$Lt_126_4610:
	.loc	22	482	0
	ld.param.f32 	%f1123, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1123, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1124, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1124;
	mov.f32 	%f1125, %f164;
	mov.f32 	%f1126, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1127, %f164, %f1126;
	mov.f32 	%f1128, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p92, %f1127, %f1128;
	@!%p92 bra 	$Lt_126_297730;
	mov.f32 	%f1129, 0f00000000;  	// 0
	mov.f32 	%f1130, 0f00000000;  	// 0
	mov.f32 	%f1131, 0f00000000;  	// 0
	mov.f32 	%f1125, 0f00000000;  	// 0
	bra.uni 	$Lt_126_297474;
$Lt_126_297730:
	.loc	22	450	0
	mov.f32 	%f1132, 0f00000000;  	// 0
	max.ftz.f32 	%f755, %f35, %f1132;
	mov.f32 	%f1133, 0f00000000;  	// 0
	max.ftz.f32 	%f757, %f31, %f1133;
	mov.f32 	%f1134, 0f3f800000;  	// 1
	min.ftz.f32 	%f759, %f755, %f1134;
	mov.f32 	%f1135, 0f3f800000;  	// 1
	min.ftz.f32 	%f761, %f757, %f1135;
	add.ftz.f32 	%f1136, %f761, %f761;
	mov.f32 	%f1137, 0fbf800000;  	// -1
	add.ftz.f32 	%f1138, %f1136, %f1137;
	setp.gt.ftz.f32 	%p93, %f1138, %f759;
	@!%p93 bra 	$Lt_126_247554;
	.loc	22	452	0
	mov.f32 	%f1139, %f1138;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__303_40;
$Lt_126_247554:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p94, %f1136, %f759;
	@!%p94 bra 	$Lt_126_247810;
	.loc	22	456	0
	mov.f32 	%f1139, %f1136;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__303_40;
$Lt_126_247810:
	.loc	22	460	0
	mov.f32 	%f1139, %f759;
$LDWendi__Z5ClampIfET_S0_S0_S0__303_40:
	.loc	22	482	0
	mov.f32 	%f1140, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1140, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1141, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1142, %f161, %f174;
	sub.ftz.f32 	%f178, %f1141, %f1142;
	mul.ftz.f32 	%f1143, %f1139, %f38;
	fma.rn.ftz.f32 	%f1144, %f31, %f173, %f1143;
	mul.ftz.f32 	%f1145, %f175, %f1144;
	fma.rn.ftz.f32 	%f1131, %f35, %f178, %f1145;
	.loc	22	450	0
	mov.f32 	%f1146, 0f00000000;  	// 0
	max.ftz.f32 	%f783, %f36, %f1146;
	mov.f32 	%f1147, 0f00000000;  	// 0
	max.ftz.f32 	%f785, %f32, %f1147;
	mov.f32 	%f1148, 0f3f800000;  	// 1
	min.ftz.f32 	%f787, %f783, %f1148;
	mov.f32 	%f1149, 0f3f800000;  	// 1
	min.ftz.f32 	%f789, %f785, %f1149;
	add.ftz.f32 	%f1150, %f789, %f789;
	mov.f32 	%f1151, 0fbf800000;  	// -1
	add.ftz.f32 	%f1152, %f1150, %f1151;
	setp.gt.ftz.f32 	%p95, %f1152, %f787;
	@!%p95 bra 	$Lt_126_248066;
	.loc	22	452	0
	mov.f32 	%f1153, %f1152;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__303_38;
$Lt_126_248066:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p96, %f1150, %f787;
	@!%p96 bra 	$Lt_126_248322;
	.loc	22	456	0
	mov.f32 	%f1153, %f1150;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__303_38;
$Lt_126_248322:
	.loc	22	460	0
	mov.f32 	%f1153, %f787;
$LDWendi__Z5ClampIfET_S0_S0_S0__303_38:
	.loc	22	482	0
	mul.ftz.f32 	%f1154, %f1153, %f38;
	fma.rn.ftz.f32 	%f1155, %f32, %f173, %f1154;
	mul.ftz.f32 	%f1156, %f175, %f1155;
	fma.rn.ftz.f32 	%f1130, %f36, %f178, %f1156;
	.loc	22	450	0
	mov.f32 	%f1157, 0f00000000;  	// 0
	max.ftz.f32 	%f808, %f37, %f1157;
	mov.f32 	%f1158, 0f00000000;  	// 0
	max.ftz.f32 	%f810, %f33, %f1158;
	mov.f32 	%f1159, 0f3f800000;  	// 1
	min.ftz.f32 	%f812, %f808, %f1159;
	mov.f32 	%f1160, 0f3f800000;  	// 1
	min.ftz.f32 	%f814, %f810, %f1160;
	add.ftz.f32 	%f1161, %f814, %f814;
	mov.f32 	%f1162, 0fbf800000;  	// -1
	add.ftz.f32 	%f1163, %f1161, %f1162;
	setp.gt.ftz.f32 	%p97, %f1163, %f812;
	@!%p97 bra 	$Lt_126_248578;
	.loc	22	452	0
	mov.f32 	%f1164, %f1163;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__303_36;
$Lt_126_248578:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p98, %f1161, %f812;
	@!%p98 bra 	$Lt_126_248834;
	.loc	22	456	0
	mov.f32 	%f1164, %f1161;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__303_36;
$Lt_126_248834:
	.loc	22	460	0
	mov.f32 	%f1164, %f812;
$LDWendi__Z5ClampIfET_S0_S0_S0__303_36:
	.loc	22	482	0
	mul.ftz.f32 	%f1165, %f1164, %f38;
	fma.rn.ftz.f32 	%f1166, %f33, %f173, %f1165;
	mul.ftz.f32 	%f1167, %f175, %f1166;
	fma.rn.ftz.f32 	%f1129, %f37, %f178, %f1167;
$Lt_126_297474:
	.loc	6	207	0
	mov.f32 	%f31, %f1131;
	mov.f32 	%f32, %f1130;
	mov.f32 	%f33, %f1129;
	mov.f32 	%f34, %f1125;
	bra.uni 	$Lt_126_317954;
$Lt_126_4866:
	.loc	22	483	0
	ld.param.f32 	%f1168, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1168, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1169, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1169;
	mov.f32 	%f1170, %f164;
	mov.f32 	%f1171, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1172, %f164, %f1171;
	mov.f32 	%f1173, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p99, %f1172, %f1173;
	@!%p99 bra 	$Lt_126_298242;
	mov.f32 	%f1174, 0f00000000;  	// 0
	mov.f32 	%f1175, 0f00000000;  	// 0
	mov.f32 	%f1176, 0f00000000;  	// 0
	mov.f32 	%f1170, 0f00000000;  	// 0
	bra.uni 	$Lt_126_297986;
$Lt_126_298242:
	mov.f32 	%f1177, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1177, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1178, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1179, %f161, %f174;
	sub.ftz.f32 	%f178, %f1178, %f1179;
	mov.f32 	%f1180, 0f00000000;  	// 0
	mov.f32 	%f1181, 0f3f800000;  	// 1
	mov.f32 	%f1182, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1183, %f1182, %f35;
	setp.lt.ftz.f32 	%p100, %f31, %f1183;
	selp.f32 	%f1184, %f1180, %f1181, %p100;
	mul.ftz.f32 	%f1185, %f1184, %f38;
	fma.rn.ftz.f32 	%f1186, %f31, %f173, %f1185;
	mul.ftz.f32 	%f1187, %f175, %f1186;
	fma.rn.ftz.f32 	%f1176, %f35, %f178, %f1187;
	mov.f32 	%f1188, 0f00000000;  	// 0
	mov.f32 	%f1189, 0f3f800000;  	// 1
	mov.f32 	%f1190, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1191, %f1190, %f36;
	setp.lt.ftz.f32 	%p101, %f32, %f1191;
	selp.f32 	%f1192, %f1188, %f1189, %p101;
	mul.ftz.f32 	%f1193, %f1192, %f38;
	fma.rn.ftz.f32 	%f1194, %f32, %f173, %f1193;
	mul.ftz.f32 	%f1195, %f175, %f1194;
	fma.rn.ftz.f32 	%f1175, %f36, %f178, %f1195;
	mov.f32 	%f1196, 0f00000000;  	// 0
	mov.f32 	%f1197, 0f3f800000;  	// 1
	mov.f32 	%f1198, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1199, %f1198, %f37;
	setp.lt.ftz.f32 	%p102, %f33, %f1199;
	selp.f32 	%f1200, %f1196, %f1197, %p102;
	mul.ftz.f32 	%f1201, %f1200, %f38;
	fma.rn.ftz.f32 	%f1202, %f33, %f173, %f1201;
	mul.ftz.f32 	%f1203, %f175, %f1202;
	fma.rn.ftz.f32 	%f1174, %f37, %f178, %f1203;
$Lt_126_297986:
	.loc	6	208	0
	mov.f32 	%f31, %f1176;
	mov.f32 	%f32, %f1175;
	mov.f32 	%f33, %f1174;
	mov.f32 	%f34, %f1170;
	bra.uni 	$Lt_126_317954;
$Lt_126_5122:
	.loc	22	484	0
	ld.param.f32 	%f1204, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1204, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1205, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1205;
	mov.f32 	%f1206, %f164;
	mov.f32 	%f1207, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1208, %f164, %f1207;
	mov.f32 	%f1209, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p103, %f1208, %f1209;
	@!%p103 bra 	$Lt_126_298754;
	mov.f32 	%f1210, 0f00000000;  	// 0
	mov.f32 	%f1211, 0f00000000;  	// 0
	mov.f32 	%f1212, 0f00000000;  	// 0
	mov.f32 	%f1206, 0f00000000;  	// 0
	bra.uni 	$Lt_126_298498;
$Lt_126_298754:
	mov.f32 	%f1213, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1213, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1214, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1215, %f161, %f174;
	sub.ftz.f32 	%f178, %f1214, %f1215;
	sub.ftz.f32 	%f1216, %f31, %f35;
	abs.ftz.f32 	%f1217, %f1216;
	mul.ftz.f32 	%f1218, %f38, %f1217;
	fma.rn.ftz.f32 	%f1219, %f31, %f173, %f1218;
	mul.ftz.f32 	%f1220, %f175, %f1219;
	fma.rn.ftz.f32 	%f1212, %f35, %f178, %f1220;
	sub.ftz.f32 	%f1221, %f32, %f36;
	abs.ftz.f32 	%f1222, %f1221;
	mul.ftz.f32 	%f1223, %f38, %f1222;
	fma.rn.ftz.f32 	%f1224, %f32, %f173, %f1223;
	mul.ftz.f32 	%f1225, %f175, %f1224;
	fma.rn.ftz.f32 	%f1211, %f36, %f178, %f1225;
	sub.ftz.f32 	%f1226, %f33, %f37;
	abs.ftz.f32 	%f1227, %f1226;
	mul.ftz.f32 	%f1228, %f38, %f1227;
	fma.rn.ftz.f32 	%f1229, %f33, %f173, %f1228;
	mul.ftz.f32 	%f1230, %f175, %f1229;
	fma.rn.ftz.f32 	%f1210, %f37, %f178, %f1230;
$Lt_126_298498:
	.loc	6	209	0
	mov.f32 	%f31, %f1212;
	mov.f32 	%f32, %f1211;
	mov.f32 	%f33, %f1210;
	mov.f32 	%f34, %f1206;
	bra.uni 	$Lt_126_317954;
$Lt_126_5378:
	.loc	22	485	0
	ld.param.f32 	%f1231, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1231, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1232, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1232;
	mov.f32 	%f1233, %f164;
	mov.f32 	%f1234, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1235, %f164, %f1234;
	mov.f32 	%f1236, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p104, %f1235, %f1236;
	@!%p104 bra 	$Lt_126_299266;
	mov.f32 	%f1237, 0f00000000;  	// 0
	mov.f32 	%f1238, 0f00000000;  	// 0
	mov.f32 	%f1239, 0f00000000;  	// 0
	mov.f32 	%f1233, 0f00000000;  	// 0
	bra.uni 	$Lt_126_299010;
$Lt_126_299266:
	mov.f32 	%f1240, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1240, %f38;
	mov.f32 	%f1241, 0f00000000;  	// 0
	max.ftz.f32 	%f755, %f35, %f1241;
	mov.f32 	%f1242, 0f00000000;  	// 0
	max.ftz.f32 	%f757, %f31, %f1242;
	mov.f32 	%f1243, 0f3f800000;  	// 1
	min.ftz.f32 	%f759, %f755, %f1243;
	mov.f32 	%f1244, 0f3f800000;  	// 1
	min.ftz.f32 	%f761, %f757, %f1244;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1245, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1246, %f161, %f174;
	sub.ftz.f32 	%f178, %f1245, %f1246;
	add.ftz.f32 	%f1247, %f761, %f759;
	add.ftz.f32 	%f1248, %f761, %f761;
	mul.ftz.f32 	%f1249, %f759, %f1248;
	sub.ftz.f32 	%f1250, %f1247, %f1249;
	mov.f32 	%f1251, 0f00000000;  	// 0
	max.ftz.f32 	%f1252, %f1250, %f1251;
	mov.f32 	%f1253, 0f3f800000;  	// 1
	min.ftz.f32 	%f1254, %f1252, %f1253;
	mul.ftz.f32 	%f1255, %f38, %f1254;
	fma.rn.ftz.f32 	%f1256, %f31, %f173, %f1255;
	mul.ftz.f32 	%f1257, %f175, %f1256;
	fma.rn.ftz.f32 	%f1239, %f35, %f178, %f1257;
	mov.f32 	%f1258, 0f00000000;  	// 0
	max.ftz.f32 	%f783, %f36, %f1258;
	mov.f32 	%f1259, 0f00000000;  	// 0
	max.ftz.f32 	%f785, %f32, %f1259;
	mov.f32 	%f1260, 0f3f800000;  	// 1
	min.ftz.f32 	%f787, %f783, %f1260;
	mov.f32 	%f1261, 0f3f800000;  	// 1
	min.ftz.f32 	%f789, %f785, %f1261;
	add.ftz.f32 	%f1262, %f789, %f787;
	add.ftz.f32 	%f1263, %f789, %f789;
	mul.ftz.f32 	%f1264, %f787, %f1263;
	sub.ftz.f32 	%f1265, %f1262, %f1264;
	mov.f32 	%f1266, 0f00000000;  	// 0
	max.ftz.f32 	%f1267, %f1265, %f1266;
	mov.f32 	%f1268, 0f3f800000;  	// 1
	min.ftz.f32 	%f1269, %f1267, %f1268;
	mul.ftz.f32 	%f1270, %f38, %f1269;
	fma.rn.ftz.f32 	%f1271, %f32, %f173, %f1270;
	mul.ftz.f32 	%f1272, %f175, %f1271;
	fma.rn.ftz.f32 	%f1238, %f36, %f178, %f1272;
	mov.f32 	%f1273, 0f00000000;  	// 0
	max.ftz.f32 	%f808, %f37, %f1273;
	mov.f32 	%f1274, 0f00000000;  	// 0
	max.ftz.f32 	%f810, %f33, %f1274;
	mov.f32 	%f1275, 0f3f800000;  	// 1
	min.ftz.f32 	%f812, %f808, %f1275;
	mov.f32 	%f1276, 0f3f800000;  	// 1
	min.ftz.f32 	%f814, %f810, %f1276;
	add.ftz.f32 	%f1277, %f814, %f812;
	add.ftz.f32 	%f1278, %f814, %f814;
	mul.ftz.f32 	%f1279, %f812, %f1278;
	sub.ftz.f32 	%f1280, %f1277, %f1279;
	mov.f32 	%f1281, 0f00000000;  	// 0
	max.ftz.f32 	%f1282, %f1280, %f1281;
	mov.f32 	%f1283, 0f3f800000;  	// 1
	min.ftz.f32 	%f1284, %f1282, %f1283;
	mul.ftz.f32 	%f1285, %f38, %f1284;
	fma.rn.ftz.f32 	%f1286, %f33, %f173, %f1285;
	mul.ftz.f32 	%f1287, %f175, %f1286;
	fma.rn.ftz.f32 	%f1237, %f37, %f178, %f1287;
$Lt_126_299010:
	.loc	6	210	0
	mov.f32 	%f31, %f1239;
	mov.f32 	%f32, %f1238;
	mov.f32 	%f33, %f1237;
	mov.f32 	%f34, %f1233;
	bra.uni 	$Lt_126_317954;
$Lt_126_5634:
	.loc	22	486	0
	ld.param.f32 	%f1288, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1288, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1289, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1289;
	mov.f32 	%f1290, %f164;
	mov.f32 	%f1291, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1292, %f164, %f1291;
	mov.f32 	%f1293, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p105, %f1292, %f1293;
	@!%p105 bra 	$Lt_126_299778;
	mov.f32 	%f1294, 0f00000000;  	// 0
	mov.f32 	%f1295, 0f00000000;  	// 0
	mov.f32 	%f1296, 0f00000000;  	// 0
	mov.f32 	%f1290, 0f00000000;  	// 0
	bra.uni 	$Lt_126_299522;
$Lt_126_299778:
	mov.f32 	%f1297, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1297, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1298, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1299, %f161, %f174;
	sub.ftz.f32 	%f178, %f1298, %f1299;
	mov.f32 	%f1300, 0f00000000;  	// 0
	max.ftz.f32 	%f1301, %f35, %f1300;
	mov.f32 	%f1302, 0f3f800000;  	// 1
	min.ftz.f32 	%f1303, %f1301, %f1302;
	mov.f32 	%f1304, 0f00000000;  	// 0
	max.ftz.f32 	%f1305, %f31, %f1304;
	mov.f32 	%f1306, 0f3f800000;  	// 1
	min.ftz.f32 	%f1307, %f1305, %f1306;
	sub.ftz.f32 	%f1308, %f1303, %f1307;
	mov.f32 	%f1309, 0f00000000;  	// 0
	max.ftz.f32 	%f1310, %f1308, %f1309;
	mov.f32 	%f1311, 0f3f800000;  	// 1
	min.ftz.f32 	%f1312, %f1310, %f1311;
	mul.ftz.f32 	%f1313, %f38, %f1312;
	fma.rn.ftz.f32 	%f1314, %f31, %f173, %f1313;
	mul.ftz.f32 	%f1315, %f175, %f1314;
	fma.rn.ftz.f32 	%f1296, %f35, %f178, %f1315;
	mov.f32 	%f1316, 0f00000000;  	// 0
	max.ftz.f32 	%f1317, %f36, %f1316;
	mov.f32 	%f1318, 0f3f800000;  	// 1
	min.ftz.f32 	%f1319, %f1317, %f1318;
	mov.f32 	%f1320, 0f00000000;  	// 0
	max.ftz.f32 	%f1321, %f32, %f1320;
	mov.f32 	%f1322, 0f3f800000;  	// 1
	min.ftz.f32 	%f1323, %f1321, %f1322;
	sub.ftz.f32 	%f1324, %f1319, %f1323;
	mov.f32 	%f1325, 0f00000000;  	// 0
	max.ftz.f32 	%f1326, %f1324, %f1325;
	mov.f32 	%f1327, 0f3f800000;  	// 1
	min.ftz.f32 	%f1328, %f1326, %f1327;
	mul.ftz.f32 	%f1329, %f38, %f1328;
	fma.rn.ftz.f32 	%f1330, %f32, %f173, %f1329;
	mul.ftz.f32 	%f1331, %f175, %f1330;
	fma.rn.ftz.f32 	%f1295, %f36, %f178, %f1331;
	mov.f32 	%f1332, 0f00000000;  	// 0
	max.ftz.f32 	%f1333, %f37, %f1332;
	mov.f32 	%f1334, 0f3f800000;  	// 1
	min.ftz.f32 	%f1335, %f1333, %f1334;
	mov.f32 	%f1336, 0f00000000;  	// 0
	max.ftz.f32 	%f1337, %f33, %f1336;
	mov.f32 	%f1338, 0f3f800000;  	// 1
	min.ftz.f32 	%f1339, %f1337, %f1338;
	sub.ftz.f32 	%f1340, %f1335, %f1339;
	mov.f32 	%f1341, 0f00000000;  	// 0
	max.ftz.f32 	%f1342, %f1340, %f1341;
	mov.f32 	%f1343, 0f3f800000;  	// 1
	min.ftz.f32 	%f1344, %f1342, %f1343;
	mul.ftz.f32 	%f1345, %f38, %f1344;
	fma.rn.ftz.f32 	%f1346, %f33, %f173, %f1345;
	mul.ftz.f32 	%f1347, %f175, %f1346;
	fma.rn.ftz.f32 	%f1294, %f37, %f178, %f1347;
$Lt_126_299522:
	.loc	6	211	0
	mov.f32 	%f31, %f1296;
	mov.f32 	%f32, %f1295;
	mov.f32 	%f33, %f1294;
	mov.f32 	%f34, %f1290;
	bra.uni 	$Lt_126_317954;
$Lt_126_5890:
	.loc	22	487	0
	ld.param.f32 	%f1348, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1348, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1349, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1349;
	mov.f32 	%f1350, %f164;
	mov.f32 	%f1351, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1352, %f164, %f1351;
	mov.f32 	%f1353, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p106, %f1352, %f1353;
	@!%p106 bra 	$Lt_126_300290;
	mov.f32 	%f1354, 0f00000000;  	// 0
	mov.f32 	%f1355, 0f00000000;  	// 0
	mov.f32 	%f1356, 0f00000000;  	// 0
	mov.f32 	%f1350, 0f00000000;  	// 0
	bra.uni 	$Lt_126_300034;
$Lt_126_300290:
	mov.f32 	%f1357, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1357, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1358, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1359, %f161, %f174;
	sub.ftz.f32 	%f178, %f1358, %f1359;
	mov.f32 	%f1360, 0f00000000;  	// 0
	max.ftz.f32 	%f1361, %f35, %f1360;
	mov.f32 	%f1362, 0f3f800000;  	// 1
	min.ftz.f32 	%f1363, %f1361, %f1362;
	mov.f32 	%f1364, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1365, %f31, %f1364;
	mov.f32 	%f1366, 0f3f800000;  	// 1
	min.ftz.f32 	%f1367, %f1365, %f1366;
	div.approx.ftz.f32 	%f1368, %f1363, %f1367;
	mov.f32 	%f1369, 0f00000000;  	// 0
	max.ftz.f32 	%f1370, %f1368, %f1369;
	mov.f32 	%f1371, 0f3f800000;  	// 1
	min.ftz.f32 	%f1372, %f1370, %f1371;
	mul.ftz.f32 	%f1373, %f38, %f1372;
	fma.rn.ftz.f32 	%f1374, %f31, %f173, %f1373;
	mul.ftz.f32 	%f1375, %f175, %f1374;
	fma.rn.ftz.f32 	%f1356, %f35, %f178, %f1375;
	mov.f32 	%f1376, 0f00000000;  	// 0
	max.ftz.f32 	%f1377, %f36, %f1376;
	mov.f32 	%f1378, 0f3f800000;  	// 1
	min.ftz.f32 	%f1379, %f1377, %f1378;
	mov.f32 	%f1380, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1381, %f32, %f1380;
	mov.f32 	%f1382, 0f3f800000;  	// 1
	min.ftz.f32 	%f1383, %f1381, %f1382;
	div.approx.ftz.f32 	%f1384, %f1379, %f1383;
	mov.f32 	%f1385, 0f00000000;  	// 0
	max.ftz.f32 	%f1386, %f1384, %f1385;
	mov.f32 	%f1387, 0f3f800000;  	// 1
	min.ftz.f32 	%f1388, %f1386, %f1387;
	mul.ftz.f32 	%f1389, %f38, %f1388;
	fma.rn.ftz.f32 	%f1390, %f32, %f173, %f1389;
	mul.ftz.f32 	%f1391, %f175, %f1390;
	fma.rn.ftz.f32 	%f1355, %f36, %f178, %f1391;
	mov.f32 	%f1392, 0f00000000;  	// 0
	max.ftz.f32 	%f1393, %f37, %f1392;
	mov.f32 	%f1394, 0f3f800000;  	// 1
	min.ftz.f32 	%f1395, %f1393, %f1394;
	mov.f32 	%f1396, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1397, %f33, %f1396;
	mov.f32 	%f1398, 0f3f800000;  	// 1
	min.ftz.f32 	%f1399, %f1397, %f1398;
	div.approx.ftz.f32 	%f1400, %f1395, %f1399;
	mov.f32 	%f1401, 0f00000000;  	// 0
	max.ftz.f32 	%f1402, %f1400, %f1401;
	mov.f32 	%f1403, 0f3f800000;  	// 1
	min.ftz.f32 	%f1404, %f1402, %f1403;
	mul.ftz.f32 	%f1405, %f38, %f1404;
	fma.rn.ftz.f32 	%f1406, %f33, %f173, %f1405;
	mul.ftz.f32 	%f1407, %f175, %f1406;
	fma.rn.ftz.f32 	%f1354, %f37, %f178, %f1407;
$Lt_126_300034:
	.loc	6	212	0
	mov.f32 	%f31, %f1356;
	mov.f32 	%f32, %f1355;
	mov.f32 	%f33, %f1354;
	mov.f32 	%f34, %f1350;
	bra.uni 	$Lt_126_317954;
$Lt_126_6146:
	.loc	22	154	0
	setp.lt.ftz.f32 	%p107, %f31, %f32;
	max.ftz.f32 	%f1408, %f31, %f32;
	selp.f32 	%f1409, %f31, %f32, %p107;
	max.ftz.f32 	%f1410, %f1408, %f33;
	setp.lt.ftz.f32 	%p108, %f1409, %f33;
	selp.f32 	%f1411, %f1409, %f33, %p108;
	setp.eq.ftz.f32 	%p109, %f1411, %f33;
	@!%p109 bra 	$Lt_126_300802;
	setp.eq.ftz.f32 	%p110, %f1410, %f32;
	@!%p110 bra 	$Lt_126_301314;
	setp.gt.ftz.f32 	%p111, %f32, %f33;
	@!%p111 bra 	$Lt_126_301826;
	.loc	22	161	0
	max.ftz.f32 	%f1412, %f35, %f36;
	setp.lt.ftz.f32 	%p112, %f35, %f36;
	max.ftz.f32 	%f1413, %f1412, %f37;
	selp.f32 	%f1414, %f35, %f36, %p112;
	setp.lt.ftz.f32 	%p113, %f1414, %f37;
	selp.f32 	%f1415, %f1414, %f37, %p113;
	sub.ftz.f32 	%f1416, %f1413, %f1415;
	cvt.ftz.sat.f32.f32 	%f1417, %f1416;
	sub.ftz.f32 	%f1418, %f31, %f33;
	mul.ftz.f32 	%f1419, %f1417, %f1418;
	sub.ftz.f32 	%f1420, %f32, %f33;
	div.approx.ftz.f32 	%f1421, %f1419, %f1420;
	.loc	22	162	0
	mov.f32 	%f1422, %f1417;
	bra.uni 	$Lt_126_302082;
$Lt_126_301826:
	.loc	22	166	0
	mov.f32 	%f1421, 0f00000000;  	// 0
	mov.f32 	%f1422, 0f00000000;  	// 0
	bra.uni 	$Lt_126_302082;
$Lt_126_301314:
	setp.gt.ftz.f32 	%p114, %f31, %f33;
	@!%p114 bra 	$Lt_126_302338;
	.loc	22	173	0
	max.ftz.f32 	%f1412, %f35, %f36;
	setp.lt.ftz.f32 	%p112, %f35, %f36;
	max.ftz.f32 	%f1413, %f1412, %f37;
	selp.f32 	%f1414, %f35, %f36, %p112;
	setp.lt.ftz.f32 	%p113, %f1414, %f37;
	selp.f32 	%f1415, %f1414, %f37, %p113;
	sub.ftz.f32 	%f1416, %f1413, %f1415;
	cvt.ftz.sat.f32.f32 	%f1417, %f1416;
	sub.ftz.f32 	%f1423, %f32, %f33;
	mul.ftz.f32 	%f1424, %f1417, %f1423;
	sub.ftz.f32 	%f1425, %f31, %f33;
	div.approx.ftz.f32 	%f1422, %f1424, %f1425;
	.loc	22	174	0
	mov.f32 	%f1421, %f1417;
	bra.uni 	$Lt_126_302082;
$Lt_126_302338:
	.loc	22	178	0
	mov.f32 	%f1421, 0f00000000;  	// 0
	mov.f32 	%f1422, 0f00000000;  	// 0
$Lt_126_302082:
$Lt_126_301058:
	mov.f32 	%f1426, 0f00000000;  	// 0
	bra.uni 	$Lt_126_304642;
$Lt_126_300802:
	setp.eq.ftz.f32 	%p115, %f1411, %f32;
	setp.eq.ftz.f32 	%p116, %f1410, %f33;
	@!%p116 bra 	$Lt_126_302850;
	@!%p115 bra 	$Lt_126_303362;
	setp.lt.ftz.f32 	%p117, %f32, %f33;
	@!%p117 bra 	$Lt_126_303874;
	.loc	22	191	0
	max.ftz.f32 	%f1412, %f35, %f36;
	setp.lt.ftz.f32 	%p112, %f35, %f36;
	max.ftz.f32 	%f1413, %f1412, %f37;
	selp.f32 	%f1414, %f35, %f36, %p112;
	setp.lt.ftz.f32 	%p113, %f1414, %f37;
	selp.f32 	%f1415, %f1414, %f37, %p113;
	sub.ftz.f32 	%f1416, %f1413, %f1415;
	cvt.ftz.sat.f32.f32 	%f1417, %f1416;
	sub.ftz.f32 	%f1427, %f31, %f32;
	mul.ftz.f32 	%f1428, %f1417, %f1427;
	sub.ftz.f32 	%f1429, %f33, %f32;
	div.approx.ftz.f32 	%f1421, %f1428, %f1429;
	.loc	22	192	0
	mov.f32 	%f1426, %f1417;
	bra.uni 	$Lt_126_303618;
$Lt_126_303874:
	.loc	22	196	0
	mov.f32 	%f1421, 0f00000000;  	// 0
	mov.f32 	%f1426, 0f00000000;  	// 0
$Lt_126_303618:
	mov.f32 	%f1422, 0f00000000;  	// 0
	bra.uni 	$Lt_126_304642;
$Lt_126_303362:
	setp.lt.ftz.f32 	%p118, %f31, %f33;
	@!%p118 bra 	$Lt_126_304386;
	.loc	22	204	0
	max.ftz.f32 	%f1412, %f35, %f36;
	setp.lt.ftz.f32 	%p112, %f35, %f36;
	max.ftz.f32 	%f1413, %f1412, %f37;
	selp.f32 	%f1414, %f35, %f36, %p112;
	setp.lt.ftz.f32 	%p113, %f1414, %f37;
	selp.f32 	%f1415, %f1414, %f37, %p113;
	sub.ftz.f32 	%f1416, %f1413, %f1415;
	cvt.ftz.sat.f32.f32 	%f1417, %f1416;
	sub.ftz.f32 	%f1430, %f32, %f31;
	mul.ftz.f32 	%f1431, %f1417, %f1430;
	sub.ftz.f32 	%f1432, %f33, %f31;
	div.approx.ftz.f32 	%f1422, %f1431, %f1432;
	.loc	22	205	0
	mov.f32 	%f1426, %f1417;
	bra.uni 	$Lt_126_304130;
$Lt_126_304386:
	.loc	22	209	0
	mov.f32 	%f1426, 0f00000000;  	// 0
	mov.f32 	%f1422, 0f00000000;  	// 0
$Lt_126_304130:
	.loc	22	211	0
	mov.f32 	%f1421, 0f00000000;  	// 0
	bra.uni 	$Lt_126_304642;
$Lt_126_302850:
	@!%p115 bra 	$Lt_126_304898;
	setp.gt.ftz.f32 	%p119, %f31, %f32;
	@!%p119 bra 	$Lt_126_305410;
	.loc	22	220	0
	max.ftz.f32 	%f1412, %f35, %f36;
	setp.lt.ftz.f32 	%p112, %f35, %f36;
	max.ftz.f32 	%f1413, %f1412, %f37;
	selp.f32 	%f1414, %f35, %f36, %p112;
	setp.lt.ftz.f32 	%p113, %f1414, %f37;
	selp.f32 	%f1415, %f1414, %f37, %p113;
	sub.ftz.f32 	%f1416, %f1413, %f1415;
	cvt.ftz.sat.f32.f32 	%f1417, %f1416;
	sub.ftz.f32 	%f1433, %f33, %f32;
	mul.ftz.f32 	%f1434, %f1417, %f1433;
	sub.ftz.f32 	%f1435, %f31, %f32;
	div.approx.ftz.f32 	%f1426, %f1434, %f1435;
	.loc	22	221	0
	mov.f32 	%f1421, %f1417;
	bra.uni 	$Lt_126_305154;
$Lt_126_305410:
	.loc	22	225	0
	mov.f32 	%f1421, 0f00000000;  	// 0
	mov.f32 	%f1426, 0f00000000;  	// 0
$Lt_126_305154:
	mov.f32 	%f1422, 0f00000000;  	// 0
	bra.uni 	$Lt_126_304642;
$Lt_126_304898:
	@!%p107 bra 	$Lt_126_305922;
	.loc	22	233	0
	max.ftz.f32 	%f1412, %f35, %f36;
	setp.lt.ftz.f32 	%p112, %f35, %f36;
	max.ftz.f32 	%f1413, %f1412, %f37;
	selp.f32 	%f1414, %f35, %f36, %p112;
	setp.lt.ftz.f32 	%p113, %f1414, %f37;
	selp.f32 	%f1415, %f1414, %f37, %p113;
	sub.ftz.f32 	%f1416, %f1413, %f1415;
	cvt.ftz.sat.f32.f32 	%f1417, %f1416;
	sub.ftz.f32 	%f1436, %f33, %f31;
	mul.ftz.f32 	%f1437, %f1417, %f1436;
	sub.ftz.f32 	%f1438, %f32, %f31;
	div.approx.ftz.f32 	%f1426, %f1437, %f1438;
	.loc	22	234	0
	mov.f32 	%f1422, %f1417;
	bra.uni 	$Lt_126_305666;
$Lt_126_305922:
	.loc	22	238	0
	mov.f32 	%f1426, 0f00000000;  	// 0
	mov.f32 	%f1422, 0f00000000;  	// 0
$Lt_126_305666:
	.loc	22	240	0
	mov.f32 	%f1421, 0f00000000;  	// 0
$Lt_126_304642:
$Lt_126_302594:
$Lt_126_300546:
	.loc	22	113	0
	ld.const.f32 	%f497, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1439, %f36, %f497;
	mul.ftz.f32 	%f1440, %f1422, %f497;
	ld.const.f32 	%f496, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1441, %f496, %f37, %f1439;
	fma.rn.ftz.f32 	%f1442, %f496, %f1426, %f1440;
	ld.const.f32 	%f495, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1443, %f495, %f35, %f1441;
	fma.rn.ftz.f32 	%f1444, %f495, %f1421, %f1442;
	cvt.ftz.sat.f32.f32 	%f1445, %f1443;
	cvt.ftz.sat.f32.f32 	%f1446, %f1444;
	sub.ftz.f32 	%f1447, %f1445, %f1446;
	add.ftz.f32 	%f1448, %f1447, %f1421;
	mov.f32 	%f1449, %f1448;
	add.ftz.f32 	%f1450, %f1447, %f1422;
	mov.f32 	%f1451, %f1450;
	add.ftz.f32 	%f1452, %f1447, %f1426;
	mov.f32 	%f1453, %f1452;
	.loc	22	50	0
	mul.ftz.f32 	%f1454, %f1450, %f497;
	fma.rn.ftz.f32 	%f1455, %f496, %f1452, %f1454;
	fma.rn.ftz.f32 	%f1456, %f495, %f1448, %f1455;
	cvt.ftz.sat.f32.f32 	%f1457, %f1456;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p120, %f1450, %f1448;
	selp.f32 	%f1458, %f1448, %f1450, %p120;
	setp.lt.ftz.f32 	%p121, %f1458, %f1452;
	selp.f32 	%f1459, %f1458, %f1452, %p121;
	mov.f32 	%f1460, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p122, %f1459, %f1460;
	@!%p122 bra 	$Lt_126_306178;
	.loc	22	119	0
	sub.ftz.f32 	%f1461, %f1457, %f1459;
	sub.ftz.f32 	%f1462, %f1452, %f1457;
	mul.ftz.f32 	%f1463, %f1457, %f1462;
	div.approx.ftz.f32 	%f1464, %f1463, %f1461;
	add.ftz.f32 	%f1453, %f1457, %f1464;
	.loc	22	120	0
	sub.ftz.f32 	%f1465, %f1450, %f1457;
	mul.ftz.f32 	%f1466, %f1457, %f1465;
	div.approx.ftz.f32 	%f1467, %f1466, %f1461;
	add.ftz.f32 	%f1451, %f1457, %f1467;
	.loc	22	121	0
	sub.ftz.f32 	%f1468, %f1448, %f1457;
	mul.ftz.f32 	%f1469, %f1457, %f1468;
	div.approx.ftz.f32 	%f1470, %f1469, %f1461;
	add.ftz.f32 	%f1449, %f1457, %f1470;
$Lt_126_306178:
	max.ftz.f32 	%f1471, %f1450, %f1448;
	max.ftz.f32 	%f1472, %f1471, %f1452;
	mov.f32 	%f1473, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p123, %f1472, %f1473;
	@!%p123 bra 	$Lt_126_306690;
	.loc	27	529	0
	mov.f32 	%f1474, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1475, %f1474, %f1457;
	sub.ftz.f32 	%f1476, %f1472, %f1457;
	sub.ftz.f32 	%f1477, %f1453, %f1457;
	mul.ftz.f32 	%f1478, %f1475, %f1477;
	div.approx.ftz.f32 	%f1479, %f1478, %f1476;
	.loc	22	125	0
	add.ftz.f32 	%f1453, %f1479, %f1457;
	.loc	27	529	0
	sub.ftz.f32 	%f1480, %f1451, %f1457;
	mul.ftz.f32 	%f1481, %f1475, %f1480;
	div.approx.ftz.f32 	%f1482, %f1481, %f1476;
	.loc	22	126	0
	add.ftz.f32 	%f1451, %f1482, %f1457;
	.loc	27	529	0
	sub.ftz.f32 	%f1483, %f1449, %f1457;
	mul.ftz.f32 	%f1484, %f1475, %f1483;
	div.approx.ftz.f32 	%f1485, %f1484, %f1476;
	.loc	22	127	0
	add.ftz.f32 	%f1449, %f1485, %f1457;
$Lt_126_306690:
	.loc	22	468	0
	ld.param.f32 	%f1486, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1486, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1487, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1487;
	mov.f32 	%f1488, %f164;
	mov.f32 	%f1489, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1490, %f164, %f1489;
	mov.f32 	%f1491, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p124, %f1490, %f1491;
	@!%p124 bra 	$Lt_126_307458;
	mov.f32 	%f1492, 0f00000000;  	// 0
	mov.f32 	%f1493, 0f00000000;  	// 0
	mov.f32 	%f1494, 0f00000000;  	// 0
	mov.f32 	%f1488, 0f00000000;  	// 0
	bra.uni 	$Lt_126_307202;
$Lt_126_307458:
	mov.f32 	%f1495, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1495, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1496, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1497, %f161, %f174;
	sub.ftz.f32 	%f178, %f1496, %f1497;
	mul.ftz.f32 	%f1498, %f173, %f1449;
	fma.rn.ftz.f32 	%f1499, %f1449, %f38, %f1498;
	mul.ftz.f32 	%f1500, %f175, %f1499;
	fma.rn.ftz.f32 	%f1494, %f35, %f178, %f1500;
	mul.ftz.f32 	%f1501, %f173, %f1451;
	fma.rn.ftz.f32 	%f1502, %f1451, %f38, %f1501;
	mul.ftz.f32 	%f1503, %f175, %f1502;
	fma.rn.ftz.f32 	%f1493, %f36, %f178, %f1503;
	mul.ftz.f32 	%f1504, %f173, %f1453;
	fma.rn.ftz.f32 	%f1505, %f1453, %f38, %f1504;
	mul.ftz.f32 	%f1506, %f175, %f1505;
	fma.rn.ftz.f32 	%f1492, %f37, %f178, %f1506;
$Lt_126_307202:
	.loc	6	213	0
	mov.f32 	%f31, %f1494;
	mov.f32 	%f32, %f1493;
	mov.f32 	%f33, %f1492;
	mov.f32 	%f34, %f1488;
	bra.uni 	$Lt_126_317954;
$Lt_126_6402:
	.loc	22	154	0
	max.ftz.f32 	%f1412, %f35, %f36;
	setp.lt.ftz.f32 	%p112, %f35, %f36;
	max.ftz.f32 	%f1413, %f1412, %f37;
	selp.f32 	%f1414, %f35, %f36, %p112;
	setp.lt.ftz.f32 	%p113, %f1414, %f37;
	selp.f32 	%f1415, %f1414, %f37, %p113;
	setp.eq.ftz.f32 	%p125, %f1415, %f37;
	@!%p125 bra 	$Lt_126_307970;
	setp.eq.ftz.f32 	%p126, %f1413, %f36;
	@!%p126 bra 	$Lt_126_308482;
	setp.gt.ftz.f32 	%p127, %f36, %f37;
	@!%p127 bra 	$Lt_126_308994;
	.loc	22	161	0
	setp.lt.ftz.f32 	%p107, %f31, %f32;
	max.ftz.f32 	%f1408, %f31, %f32;
	selp.f32 	%f1409, %f31, %f32, %p107;
	max.ftz.f32 	%f1410, %f1408, %f33;
	setp.lt.ftz.f32 	%p108, %f1409, %f33;
	selp.f32 	%f1411, %f1409, %f33, %p108;
	sub.ftz.f32 	%f1507, %f1410, %f1411;
	cvt.ftz.sat.f32.f32 	%f1508, %f1507;
	sub.ftz.f32 	%f1509, %f35, %f37;
	mul.ftz.f32 	%f1510, %f1508, %f1509;
	sub.ftz.f32 	%f1511, %f36, %f37;
	div.approx.ftz.f32 	%f1512, %f1510, %f1511;
	.loc	22	162	0
	mov.f32 	%f1513, %f1508;
	bra.uni 	$Lt_126_309250;
$Lt_126_308994:
	.loc	22	166	0
	mov.f32 	%f1512, 0f00000000;  	// 0
	mov.f32 	%f1513, 0f00000000;  	// 0
	bra.uni 	$Lt_126_309250;
$Lt_126_308482:
	setp.gt.ftz.f32 	%p128, %f35, %f37;
	@!%p128 bra 	$Lt_126_309506;
	.loc	22	173	0
	setp.lt.ftz.f32 	%p107, %f31, %f32;
	max.ftz.f32 	%f1408, %f31, %f32;
	selp.f32 	%f1409, %f31, %f32, %p107;
	max.ftz.f32 	%f1410, %f1408, %f33;
	setp.lt.ftz.f32 	%p108, %f1409, %f33;
	selp.f32 	%f1411, %f1409, %f33, %p108;
	sub.ftz.f32 	%f1507, %f1410, %f1411;
	cvt.ftz.sat.f32.f32 	%f1508, %f1507;
	sub.ftz.f32 	%f1514, %f36, %f37;
	mul.ftz.f32 	%f1515, %f1508, %f1514;
	sub.ftz.f32 	%f1516, %f35, %f37;
	div.approx.ftz.f32 	%f1513, %f1515, %f1516;
	.loc	22	174	0
	mov.f32 	%f1512, %f1508;
	bra.uni 	$Lt_126_309250;
$Lt_126_309506:
	.loc	22	178	0
	mov.f32 	%f1512, 0f00000000;  	// 0
	mov.f32 	%f1513, 0f00000000;  	// 0
$Lt_126_309250:
$Lt_126_308226:
	mov.f32 	%f1517, 0f00000000;  	// 0
	bra.uni 	$Lt_126_311810;
$Lt_126_307970:
	setp.eq.ftz.f32 	%p129, %f1415, %f36;
	setp.eq.ftz.f32 	%p130, %f1413, %f37;
	@!%p130 bra 	$Lt_126_310018;
	@!%p129 bra 	$Lt_126_310530;
	setp.lt.ftz.f32 	%p131, %f36, %f37;
	@!%p131 bra 	$Lt_126_311042;
	.loc	22	191	0
	setp.lt.ftz.f32 	%p107, %f31, %f32;
	max.ftz.f32 	%f1408, %f31, %f32;
	selp.f32 	%f1409, %f31, %f32, %p107;
	max.ftz.f32 	%f1410, %f1408, %f33;
	setp.lt.ftz.f32 	%p108, %f1409, %f33;
	selp.f32 	%f1411, %f1409, %f33, %p108;
	sub.ftz.f32 	%f1507, %f1410, %f1411;
	cvt.ftz.sat.f32.f32 	%f1508, %f1507;
	sub.ftz.f32 	%f1518, %f35, %f36;
	mul.ftz.f32 	%f1519, %f1508, %f1518;
	sub.ftz.f32 	%f1520, %f37, %f36;
	div.approx.ftz.f32 	%f1512, %f1519, %f1520;
	.loc	22	192	0
	mov.f32 	%f1517, %f1508;
	bra.uni 	$Lt_126_310786;
$Lt_126_311042:
	.loc	22	196	0
	mov.f32 	%f1512, 0f00000000;  	// 0
	mov.f32 	%f1517, 0f00000000;  	// 0
$Lt_126_310786:
	mov.f32 	%f1513, 0f00000000;  	// 0
	bra.uni 	$Lt_126_311810;
$Lt_126_310530:
	setp.lt.ftz.f32 	%p132, %f35, %f37;
	@!%p132 bra 	$Lt_126_311554;
	.loc	22	204	0
	setp.lt.ftz.f32 	%p107, %f31, %f32;
	max.ftz.f32 	%f1408, %f31, %f32;
	selp.f32 	%f1409, %f31, %f32, %p107;
	max.ftz.f32 	%f1410, %f1408, %f33;
	setp.lt.ftz.f32 	%p108, %f1409, %f33;
	selp.f32 	%f1411, %f1409, %f33, %p108;
	sub.ftz.f32 	%f1507, %f1410, %f1411;
	cvt.ftz.sat.f32.f32 	%f1508, %f1507;
	sub.ftz.f32 	%f1521, %f36, %f35;
	mul.ftz.f32 	%f1522, %f1508, %f1521;
	sub.ftz.f32 	%f1523, %f37, %f35;
	div.approx.ftz.f32 	%f1513, %f1522, %f1523;
	.loc	22	205	0
	mov.f32 	%f1517, %f1508;
	bra.uni 	$Lt_126_311298;
$Lt_126_311554:
	.loc	22	209	0
	mov.f32 	%f1517, 0f00000000;  	// 0
	mov.f32 	%f1513, 0f00000000;  	// 0
$Lt_126_311298:
	.loc	22	211	0
	mov.f32 	%f1512, 0f00000000;  	// 0
	bra.uni 	$Lt_126_311810;
$Lt_126_310018:
	@!%p129 bra 	$Lt_126_312066;
	setp.gt.ftz.f32 	%p133, %f35, %f36;
	@!%p133 bra 	$Lt_126_312578;
	.loc	22	220	0
	setp.lt.ftz.f32 	%p107, %f31, %f32;
	max.ftz.f32 	%f1408, %f31, %f32;
	selp.f32 	%f1409, %f31, %f32, %p107;
	max.ftz.f32 	%f1410, %f1408, %f33;
	setp.lt.ftz.f32 	%p108, %f1409, %f33;
	selp.f32 	%f1411, %f1409, %f33, %p108;
	sub.ftz.f32 	%f1507, %f1410, %f1411;
	cvt.ftz.sat.f32.f32 	%f1508, %f1507;
	sub.ftz.f32 	%f1524, %f37, %f36;
	mul.ftz.f32 	%f1525, %f1508, %f1524;
	sub.ftz.f32 	%f1526, %f35, %f36;
	div.approx.ftz.f32 	%f1517, %f1525, %f1526;
	.loc	22	221	0
	mov.f32 	%f1512, %f1508;
	bra.uni 	$Lt_126_312322;
$Lt_126_312578:
	.loc	22	225	0
	mov.f32 	%f1512, 0f00000000;  	// 0
	mov.f32 	%f1517, 0f00000000;  	// 0
$Lt_126_312322:
	mov.f32 	%f1513, 0f00000000;  	// 0
	bra.uni 	$Lt_126_311810;
$Lt_126_312066:
	@!%p112 bra 	$Lt_126_313090;
	.loc	22	233	0
	setp.lt.ftz.f32 	%p107, %f31, %f32;
	max.ftz.f32 	%f1408, %f31, %f32;
	selp.f32 	%f1409, %f31, %f32, %p107;
	max.ftz.f32 	%f1410, %f1408, %f33;
	setp.lt.ftz.f32 	%p108, %f1409, %f33;
	selp.f32 	%f1411, %f1409, %f33, %p108;
	sub.ftz.f32 	%f1507, %f1410, %f1411;
	cvt.ftz.sat.f32.f32 	%f1508, %f1507;
	sub.ftz.f32 	%f1527, %f37, %f35;
	mul.ftz.f32 	%f1528, %f1508, %f1527;
	sub.ftz.f32 	%f1529, %f36, %f35;
	div.approx.ftz.f32 	%f1517, %f1528, %f1529;
	.loc	22	234	0
	mov.f32 	%f1513, %f1508;
	bra.uni 	$Lt_126_312834;
$Lt_126_313090:
	.loc	22	238	0
	mov.f32 	%f1517, 0f00000000;  	// 0
	mov.f32 	%f1513, 0f00000000;  	// 0
$Lt_126_312834:
	.loc	22	240	0
	mov.f32 	%f1512, 0f00000000;  	// 0
$Lt_126_311810:
$Lt_126_309762:
$Lt_126_307714:
	.loc	22	113	0
	ld.const.f32 	%f497, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1530, %f36, %f497;
	mul.ftz.f32 	%f1531, %f1513, %f497;
	ld.const.f32 	%f496, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1532, %f496, %f37, %f1530;
	fma.rn.ftz.f32 	%f1533, %f496, %f1517, %f1531;
	ld.const.f32 	%f495, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1534, %f495, %f35, %f1532;
	fma.rn.ftz.f32 	%f1535, %f495, %f1512, %f1533;
	cvt.ftz.sat.f32.f32 	%f1536, %f1534;
	cvt.ftz.sat.f32.f32 	%f1537, %f1535;
	sub.ftz.f32 	%f1538, %f1536, %f1537;
	add.ftz.f32 	%f1539, %f1538, %f1512;
	mov.f32 	%f1540, %f1539;
	add.ftz.f32 	%f1541, %f1538, %f1513;
	mov.f32 	%f1542, %f1541;
	add.ftz.f32 	%f1543, %f1538, %f1517;
	mov.f32 	%f1544, %f1543;
	.loc	22	50	0
	mul.ftz.f32 	%f1545, %f1541, %f497;
	fma.rn.ftz.f32 	%f1546, %f496, %f1543, %f1545;
	fma.rn.ftz.f32 	%f1547, %f495, %f1539, %f1546;
	cvt.ftz.sat.f32.f32 	%f1548, %f1547;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p134, %f1541, %f1539;
	selp.f32 	%f1549, %f1539, %f1541, %p134;
	setp.lt.ftz.f32 	%p135, %f1549, %f1543;
	selp.f32 	%f1550, %f1549, %f1543, %p135;
	mov.f32 	%f1551, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p136, %f1550, %f1551;
	@!%p136 bra 	$Lt_126_313346;
	.loc	22	119	0
	sub.ftz.f32 	%f1552, %f1548, %f1550;
	sub.ftz.f32 	%f1553, %f1543, %f1548;
	mul.ftz.f32 	%f1554, %f1548, %f1553;
	div.approx.ftz.f32 	%f1555, %f1554, %f1552;
	add.ftz.f32 	%f1544, %f1548, %f1555;
	.loc	22	120	0
	sub.ftz.f32 	%f1556, %f1541, %f1548;
	mul.ftz.f32 	%f1557, %f1548, %f1556;
	div.approx.ftz.f32 	%f1558, %f1557, %f1552;
	add.ftz.f32 	%f1542, %f1548, %f1558;
	.loc	22	121	0
	sub.ftz.f32 	%f1559, %f1539, %f1548;
	mul.ftz.f32 	%f1560, %f1548, %f1559;
	div.approx.ftz.f32 	%f1561, %f1560, %f1552;
	add.ftz.f32 	%f1540, %f1548, %f1561;
$Lt_126_313346:
	max.ftz.f32 	%f1562, %f1541, %f1539;
	max.ftz.f32 	%f1563, %f1562, %f1543;
	mov.f32 	%f1564, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p137, %f1563, %f1564;
	@!%p137 bra 	$Lt_126_313858;
	.loc	27	529	0
	mov.f32 	%f1565, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1566, %f1565, %f1548;
	sub.ftz.f32 	%f1567, %f1563, %f1548;
	sub.ftz.f32 	%f1568, %f1544, %f1548;
	mul.ftz.f32 	%f1569, %f1566, %f1568;
	div.approx.ftz.f32 	%f1570, %f1569, %f1567;
	.loc	22	125	0
	add.ftz.f32 	%f1544, %f1570, %f1548;
	.loc	27	529	0
	sub.ftz.f32 	%f1571, %f1542, %f1548;
	mul.ftz.f32 	%f1572, %f1566, %f1571;
	div.approx.ftz.f32 	%f1573, %f1572, %f1567;
	.loc	22	126	0
	add.ftz.f32 	%f1542, %f1573, %f1548;
	.loc	27	529	0
	sub.ftz.f32 	%f1574, %f1540, %f1548;
	mul.ftz.f32 	%f1575, %f1566, %f1574;
	div.approx.ftz.f32 	%f1576, %f1575, %f1567;
	.loc	22	127	0
	add.ftz.f32 	%f1540, %f1576, %f1548;
$Lt_126_313858:
	.loc	22	468	0
	ld.param.f32 	%f1577, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1577, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1578, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1578;
	mov.f32 	%f1579, %f164;
	mov.f32 	%f1580, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1581, %f164, %f1580;
	mov.f32 	%f1582, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p138, %f1581, %f1582;
	@!%p138 bra 	$Lt_126_314626;
	mov.f32 	%f1583, 0f00000000;  	// 0
	mov.f32 	%f1584, 0f00000000;  	// 0
	mov.f32 	%f1585, 0f00000000;  	// 0
	mov.f32 	%f1579, 0f00000000;  	// 0
	bra.uni 	$Lt_126_314370;
$Lt_126_314626:
	mov.f32 	%f1586, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1586, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1587, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1588, %f161, %f174;
	sub.ftz.f32 	%f178, %f1587, %f1588;
	mul.ftz.f32 	%f1589, %f173, %f1540;
	fma.rn.ftz.f32 	%f1590, %f1540, %f38, %f1589;
	mul.ftz.f32 	%f1591, %f175, %f1590;
	fma.rn.ftz.f32 	%f1585, %f35, %f178, %f1591;
	mul.ftz.f32 	%f1592, %f173, %f1542;
	fma.rn.ftz.f32 	%f1593, %f1542, %f38, %f1592;
	mul.ftz.f32 	%f1594, %f175, %f1593;
	fma.rn.ftz.f32 	%f1584, %f36, %f178, %f1594;
	mul.ftz.f32 	%f1595, %f173, %f1544;
	fma.rn.ftz.f32 	%f1596, %f1544, %f38, %f1595;
	mul.ftz.f32 	%f1597, %f175, %f1596;
	fma.rn.ftz.f32 	%f1583, %f37, %f178, %f1597;
$Lt_126_314370:
	.loc	6	214	0
	mov.f32 	%f31, %f1585;
	mov.f32 	%f32, %f1584;
	mov.f32 	%f33, %f1583;
	mov.f32 	%f34, %f1579;
	bra.uni 	$Lt_126_317954;
$Lt_126_6658:
	.loc	22	113	0
	ld.const.f32 	%f497, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1598, %f32, %f497;
	mul.ftz.f32 	%f1599, %f36, %f497;
	ld.const.f32 	%f496, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1600, %f496, %f33, %f1598;
	fma.rn.ftz.f32 	%f1601, %f496, %f37, %f1599;
	ld.const.f32 	%f495, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1602, %f495, %f31, %f1600;
	fma.rn.ftz.f32 	%f1603, %f495, %f35, %f1601;
	cvt.ftz.sat.f32.f32 	%f1604, %f1602;
	cvt.ftz.sat.f32.f32 	%f1605, %f1603;
	sub.ftz.f32 	%f1606, %f1605, %f1604;
	add.ftz.f32 	%f1607, %f1606, %f31;
	mov.f32 	%f1608, %f1607;
	add.ftz.f32 	%f1609, %f1606, %f32;
	mov.f32 	%f1610, %f1609;
	add.ftz.f32 	%f1611, %f1606, %f33;
	mov.f32 	%f1612, %f1611;
	.loc	22	50	0
	mul.ftz.f32 	%f1613, %f1609, %f497;
	fma.rn.ftz.f32 	%f1614, %f496, %f1611, %f1613;
	fma.rn.ftz.f32 	%f1615, %f495, %f1607, %f1614;
	cvt.ftz.sat.f32.f32 	%f1616, %f1615;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p139, %f1609, %f1607;
	selp.f32 	%f1617, %f1607, %f1609, %p139;
	setp.lt.ftz.f32 	%p140, %f1617, %f1611;
	selp.f32 	%f1618, %f1617, %f1611, %p140;
	mov.f32 	%f1619, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p141, %f1618, %f1619;
	@!%p141 bra 	$Lt_126_314882;
	.loc	22	119	0
	sub.ftz.f32 	%f1620, %f1616, %f1618;
	sub.ftz.f32 	%f1621, %f1611, %f1616;
	mul.ftz.f32 	%f1622, %f1616, %f1621;
	div.approx.ftz.f32 	%f1623, %f1622, %f1620;
	add.ftz.f32 	%f1612, %f1616, %f1623;
	.loc	22	120	0
	sub.ftz.f32 	%f1624, %f1609, %f1616;
	mul.ftz.f32 	%f1625, %f1616, %f1624;
	div.approx.ftz.f32 	%f1626, %f1625, %f1620;
	add.ftz.f32 	%f1610, %f1616, %f1626;
	.loc	22	121	0
	sub.ftz.f32 	%f1627, %f1607, %f1616;
	mul.ftz.f32 	%f1628, %f1616, %f1627;
	div.approx.ftz.f32 	%f1629, %f1628, %f1620;
	add.ftz.f32 	%f1608, %f1616, %f1629;
$Lt_126_314882:
	max.ftz.f32 	%f1630, %f1609, %f1607;
	max.ftz.f32 	%f1631, %f1630, %f1611;
	mov.f32 	%f1632, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p142, %f1631, %f1632;
	@!%p142 bra 	$Lt_126_315394;
	.loc	27	529	0
	mov.f32 	%f1633, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1634, %f1633, %f1616;
	sub.ftz.f32 	%f1635, %f1631, %f1616;
	sub.ftz.f32 	%f1636, %f1612, %f1616;
	mul.ftz.f32 	%f1637, %f1634, %f1636;
	div.approx.ftz.f32 	%f1638, %f1637, %f1635;
	.loc	22	125	0
	add.ftz.f32 	%f1612, %f1638, %f1616;
	.loc	27	529	0
	sub.ftz.f32 	%f1639, %f1610, %f1616;
	mul.ftz.f32 	%f1640, %f1634, %f1639;
	div.approx.ftz.f32 	%f1641, %f1640, %f1635;
	.loc	22	126	0
	add.ftz.f32 	%f1610, %f1641, %f1616;
	.loc	27	529	0
	sub.ftz.f32 	%f1642, %f1608, %f1616;
	mul.ftz.f32 	%f1643, %f1634, %f1642;
	div.approx.ftz.f32 	%f1644, %f1643, %f1635;
	.loc	22	127	0
	add.ftz.f32 	%f1608, %f1644, %f1616;
$Lt_126_315394:
	.loc	22	468	0
	ld.param.f32 	%f1645, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1645, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1646, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1646;
	mov.f32 	%f1647, %f164;
	mov.f32 	%f1648, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1649, %f164, %f1648;
	mov.f32 	%f1650, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p143, %f1649, %f1650;
	@!%p143 bra 	$Lt_126_316162;
	mov.f32 	%f1651, 0f00000000;  	// 0
	mov.f32 	%f1652, 0f00000000;  	// 0
	mov.f32 	%f1653, 0f00000000;  	// 0
	mov.f32 	%f1647, 0f00000000;  	// 0
	bra.uni 	$Lt_126_315906;
$Lt_126_316162:
	mov.f32 	%f1654, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1654, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1655, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1656, %f161, %f174;
	sub.ftz.f32 	%f178, %f1655, %f1656;
	mul.ftz.f32 	%f1657, %f173, %f1608;
	fma.rn.ftz.f32 	%f1658, %f1608, %f38, %f1657;
	mul.ftz.f32 	%f1659, %f175, %f1658;
	fma.rn.ftz.f32 	%f1653, %f35, %f178, %f1659;
	mul.ftz.f32 	%f1660, %f173, %f1610;
	fma.rn.ftz.f32 	%f1661, %f1610, %f38, %f1660;
	mul.ftz.f32 	%f1662, %f175, %f1661;
	fma.rn.ftz.f32 	%f1652, %f36, %f178, %f1662;
	mul.ftz.f32 	%f1663, %f173, %f1612;
	fma.rn.ftz.f32 	%f1664, %f1612, %f38, %f1663;
	mul.ftz.f32 	%f1665, %f175, %f1664;
	fma.rn.ftz.f32 	%f1651, %f37, %f178, %f1665;
$Lt_126_315906:
	.loc	6	215	0
	mov.f32 	%f31, %f1653;
	mov.f32 	%f32, %f1652;
	mov.f32 	%f33, %f1651;
	mov.f32 	%f34, %f1647;
	bra.uni 	$Lt_126_317954;
$Lt_126_6914:
	.loc	22	113	0
	ld.const.f32 	%f497, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1666, %f32, %f497;
	mul.ftz.f32 	%f1667, %f36, %f497;
	ld.const.f32 	%f496, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1668, %f496, %f33, %f1666;
	fma.rn.ftz.f32 	%f1669, %f496, %f37, %f1667;
	ld.const.f32 	%f495, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1670, %f495, %f31, %f1668;
	fma.rn.ftz.f32 	%f1671, %f495, %f35, %f1669;
	cvt.ftz.sat.f32.f32 	%f1672, %f1670;
	cvt.ftz.sat.f32.f32 	%f1673, %f1671;
	sub.ftz.f32 	%f1674, %f1672, %f1673;
	add.ftz.f32 	%f1675, %f1674, %f35;
	mov.f32 	%f1676, %f1675;
	add.ftz.f32 	%f1677, %f1674, %f36;
	mov.f32 	%f1678, %f1677;
	add.ftz.f32 	%f1679, %f1674, %f37;
	mov.f32 	%f1680, %f1679;
	.loc	22	50	0
	mul.ftz.f32 	%f1681, %f1677, %f497;
	fma.rn.ftz.f32 	%f1682, %f496, %f1679, %f1681;
	fma.rn.ftz.f32 	%f1683, %f495, %f1675, %f1682;
	cvt.ftz.sat.f32.f32 	%f1684, %f1683;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p144, %f1677, %f1675;
	selp.f32 	%f1685, %f1675, %f1677, %p144;
	setp.lt.ftz.f32 	%p145, %f1685, %f1679;
	selp.f32 	%f1686, %f1685, %f1679, %p145;
	mov.f32 	%f1687, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p146, %f1686, %f1687;
	@!%p146 bra 	$Lt_126_316418;
	.loc	22	119	0
	sub.ftz.f32 	%f1688, %f1684, %f1686;
	sub.ftz.f32 	%f1689, %f1679, %f1684;
	mul.ftz.f32 	%f1690, %f1684, %f1689;
	div.approx.ftz.f32 	%f1691, %f1690, %f1688;
	add.ftz.f32 	%f1680, %f1684, %f1691;
	.loc	22	120	0
	sub.ftz.f32 	%f1692, %f1677, %f1684;
	mul.ftz.f32 	%f1693, %f1684, %f1692;
	div.approx.ftz.f32 	%f1694, %f1693, %f1688;
	add.ftz.f32 	%f1678, %f1684, %f1694;
	.loc	22	121	0
	sub.ftz.f32 	%f1695, %f1675, %f1684;
	mul.ftz.f32 	%f1696, %f1684, %f1695;
	div.approx.ftz.f32 	%f1697, %f1696, %f1688;
	add.ftz.f32 	%f1676, %f1684, %f1697;
$Lt_126_316418:
	max.ftz.f32 	%f1698, %f1677, %f1675;
	max.ftz.f32 	%f1699, %f1698, %f1679;
	mov.f32 	%f1700, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p147, %f1699, %f1700;
	@!%p147 bra 	$Lt_126_316930;
	.loc	27	529	0
	mov.f32 	%f1701, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1702, %f1701, %f1684;
	sub.ftz.f32 	%f1703, %f1699, %f1684;
	sub.ftz.f32 	%f1704, %f1680, %f1684;
	mul.ftz.f32 	%f1705, %f1702, %f1704;
	div.approx.ftz.f32 	%f1706, %f1705, %f1703;
	.loc	22	125	0
	add.ftz.f32 	%f1680, %f1706, %f1684;
	.loc	27	529	0
	sub.ftz.f32 	%f1707, %f1678, %f1684;
	mul.ftz.f32 	%f1708, %f1702, %f1707;
	div.approx.ftz.f32 	%f1709, %f1708, %f1703;
	.loc	22	126	0
	add.ftz.f32 	%f1678, %f1709, %f1684;
	.loc	27	529	0
	sub.ftz.f32 	%f1710, %f1676, %f1684;
	mul.ftz.f32 	%f1711, %f1702, %f1710;
	div.approx.ftz.f32 	%f1712, %f1711, %f1703;
	.loc	22	127	0
	add.ftz.f32 	%f1676, %f1712, %f1684;
$Lt_126_316930:
	.loc	22	468	0
	ld.param.f32 	%f1713, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f161, %f1713, %f34;
	add.ftz.f32 	%f162, %f161, %f38;
	mul.ftz.f32 	%f1714, %f161, %f38;
	sub.ftz.f32 	%f164, %f162, %f1714;
	mov.f32 	%f1715, %f164;
	mov.f32 	%f1716, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1717, %f164, %f1716;
	mov.f32 	%f1718, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p148, %f1717, %f1718;
	@!%p148 bra 	$Lt_126_317698;
	mov.f32 	%f1719, 0f00000000;  	// 0
	mov.f32 	%f1720, 0f00000000;  	// 0
	mov.f32 	%f1721, 0f00000000;  	// 0
	mov.f32 	%f1715, 0f00000000;  	// 0
	bra.uni 	$Lt_126_317442;
$Lt_126_317698:
	mov.f32 	%f1722, 0f3f800000;  	// 1
	sub.ftz.f32 	%f173, %f1722, %f38;
	rcp.approx.ftz.f32 	%f174, %f164;
	mul.ftz.f32 	%f175, %f174, %f161;
	mov.f32 	%f1723, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1724, %f161, %f174;
	sub.ftz.f32 	%f178, %f1723, %f1724;
	mul.ftz.f32 	%f1725, %f173, %f1676;
	fma.rn.ftz.f32 	%f1726, %f1676, %f38, %f1725;
	mul.ftz.f32 	%f1727, %f175, %f1726;
	fma.rn.ftz.f32 	%f1721, %f35, %f178, %f1727;
	mul.ftz.f32 	%f1728, %f173, %f1678;
	fma.rn.ftz.f32 	%f1729, %f1678, %f38, %f1728;
	mul.ftz.f32 	%f1730, %f175, %f1729;
	fma.rn.ftz.f32 	%f1720, %f36, %f178, %f1730;
	mul.ftz.f32 	%f1731, %f173, %f1680;
	fma.rn.ftz.f32 	%f1732, %f1680, %f38, %f1731;
	mul.ftz.f32 	%f1733, %f175, %f1732;
	fma.rn.ftz.f32 	%f1719, %f37, %f178, %f1733;
$Lt_126_317442:
	.loc	6	216	0
	mov.f32 	%f31, %f1721;
	mov.f32 	%f32, %f1720;
	mov.f32 	%f33, %f1719;
	mov.f32 	%f34, %f1715;
	bra.uni 	$Lt_126_317954;
$Lt_126_273666:
	.loc	6	218	0
	@!%p11 bra 	$Lt_126_317954;
	.loc	6	226	0
	cvt.ftz.sat.f32.f32 	%f1734, %f34;
	.loc	6	243	0
	ld.param.f32 	%f1735, [__cudaparm_cuda_motion_blitquad_alphaGain];
	mul.ftz.f32 	%f34, %f1735, %f1734;
$Lt_126_317954:
$Lt_126_273410:
	@!%p10 bra 	$Lt_126_318722;
	.loc	21	126	0
	mul.lo.u64 	%rd19, %rd13, 8;
	add.u64 	%rd20, %rd14, %rd19;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f31;
	mov.b32		%r136, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f32;
	mov.b32		%r137, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f33;
	mov.b32		%r138, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f34;
	mov.b32		%r139, %b1; }
	st.global.v4.u16 	[%rd20+0], {%r136,%r137,%r138,%r139};
	.loc	6	246	0
	bra.uni 	$LBB358_cuda_motion_blitquad;
$Lt_126_318722:
	.loc	21	126	0
	mul.lo.u64 	%rd21, %rd13, 16;
	add.u64 	%rd22, %rd14, %rd21;
	st.global.v4.f32 	[%rd22+0], {%f31,%f32,%f33,%f34};
$LBB358_cuda_motion_blitquad:
	.loc	6	585	0
	exit;
$LDWend_cuda_motion_blitquad:
	} // cuda_motion_blitquad

	.entry cuda_motion_prepsource (
		.param .u64 __cudaparm_cuda_motion_prepsource_srcFrame,
		.param .u64 __cudaparm_cuda_motion_prepsource_dstFrame,
		.param .u32 __cudaparm_cuda_motion_prepsource_inDeviceFormat,
		.param .s32 __cudaparm_cuda_motion_prepsource_width,
		.param .s32 __cudaparm_cuda_motion_prepsource_height,
		.param .s32 __cudaparm_cuda_motion_prepsource_srcPitch,
		.param .s32 __cudaparm_cuda_motion_prepsource_dstPitch)
	{
	.reg .u32 %r<36>;
	.reg .u64 %rd<14>;
	.reg .f32 %f<44>;
	.reg .pred %p<7>;
	.loc	6	588	0
$LDWbegin_cuda_motion_prepsource:
	.loc	6	591	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	cvt.s32.u32 	%r4, %ctaid.y;
	cvt.s32.u32 	%r5, %ntid.y;
	mul.lo.s32 	%r6, %r4, %r5;
	mov.u32 	%r7, %tid.x;
	add.u32 	%r8, %r3, %r7;
	mov.u32 	%r9, %tid.y;
	add.u32 	%r10, %r6, %r9;
	ld.param.s32 	%r11, [__cudaparm_cuda_motion_prepsource_width];
	set.gt.u32.s32 	%r12, %r11, %r8;
	neg.s32 	%r13, %r12;
	ld.param.s32 	%r14, [__cudaparm_cuda_motion_prepsource_height];
	set.gt.u32.s32 	%r15, %r14, %r10;
	neg.s32 	%r16, %r15;
	and.b32 	%r17, %r13, %r16;
	mov.u32 	%r18, 0;
	setp.eq.s32 	%p1, %r17, %r18;
	@%p1 bra 	$Lt_127_6658;
	ld.param.s32 	%r19, [__cudaparm_cuda_motion_prepsource_inDeviceFormat];
	mov.s32 	%r20, 0;
	setp.eq.s32 	%p2, %r19, %r20;
	ld.param.s32 	%r21, [__cudaparm_cuda_motion_prepsource_srcPitch];
	mul.lo.s32 	%r22, %r21, %r10;
	add.s32 	%r23, %r8, %r22;
	cvt.s64.s32 	%rd1, %r23;
	ld.param.u64 	%rd2, [__cudaparm_cuda_motion_prepsource_srcFrame];
	@!%p2 bra 	$Lt_127_7426;
	.loc	21	115	0
	mul.lo.u64 	%rd3, %rd1, 8;
	add.u64 	%rd4, %rd2, %rd3;
	ld.global.v4.u16 	{%r24,%r25,%r26,%r27}, [%rd4+0];
	.loc	6	595	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r24;
	cvt.ftz.f32.f16	%f1, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r25;
	cvt.ftz.f32.f16	%f2, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r26;
	cvt.ftz.f32.f16	%f3, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r27;
	cvt.ftz.f32.f16	%f4, %b1; }
	bra.uni 	$Lt_127_7170;
$Lt_127_7426:
	mul.lo.u64 	%rd5, %rd1, 16;
	add.u64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f1,%f2,%f3,%f4}, [%rd6+0];
$Lt_127_7170:
	.loc	6	597	0
	mov.f32 	%f5, 0f00000000;     	// 0
	setp.lt.ftz.f32 	%p3, %f3, %f5;
	@!%p3 bra 	$Lt_127_7682;
	.loc	5	234	0
	neg.ftz.f32 	%f6, %f3;
	lg2.approx.ftz.f32 	%f7, %f6;
	mov.f32 	%f8, 0f400ccccd;     	// 2.2
	mul.ftz.f32 	%f9, %f7, %f8;
	ex2.approx.ftz.f32 	%f10, %f9;
	neg.ftz.f32 	%f11, %f10;
	bra.uni 	$LDWendi___log2f_304_5;
$Lt_127_7682:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f12, %f3;
	mov.f32 	%f13, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f14, %f12, %f13;
	ex2.approx.ftz.f32 	%f11, %f14;
$LDWendi___log2f_304_5:
	.loc	6	598	0
	mov.f32 	%f15, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p4, %f2, %f15;
	@!%p4 bra 	$Lt_127_8194;
	.loc	5	234	0
	neg.ftz.f32 	%f16, %f2;
	lg2.approx.ftz.f32 	%f17, %f16;
	mov.f32 	%f18, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f19, %f17, %f18;
	ex2.approx.ftz.f32 	%f20, %f19;
	neg.ftz.f32 	%f21, %f20;
	bra.uni 	$LDWendi___log2f_304_3;
$Lt_127_8194:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f22, %f2;
	mov.f32 	%f23, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f24, %f22, %f23;
	ex2.approx.ftz.f32 	%f21, %f24;
$LDWendi___log2f_304_3:
	.loc	6	599	0
	mov.f32 	%f25, 0f00000000;    	// 0
	setp.lt.ftz.f32 	%p5, %f1, %f25;
	@!%p5 bra 	$Lt_127_8706;
	.loc	5	234	0
	neg.ftz.f32 	%f26, %f1;
	lg2.approx.ftz.f32 	%f27, %f26;
	mov.f32 	%f28, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f29, %f27, %f28;
	ex2.approx.ftz.f32 	%f30, %f29;
	neg.ftz.f32 	%f31, %f30;
	bra.uni 	$LDWendi___log2f_304_1;
$Lt_127_8706:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f32, %f1;
	mov.f32 	%f33, 0f400ccccd;    	// 2.2
	mul.ftz.f32 	%f34, %f32, %f33;
	ex2.approx.ftz.f32 	%f31, %f34;
$LDWendi___log2f_304_1:
	.loc	6	599	0
	ld.param.s32 	%r28, [__cudaparm_cuda_motion_prepsource_dstPitch];
	mul.lo.s32 	%r29, %r28, %r10;
	add.s32 	%r30, %r8, %r29;
	cvt.s64.s32 	%rd7, %r30;
	ld.param.u64 	%rd8, [__cudaparm_cuda_motion_prepsource_dstFrame];
	@!%p2 bra 	$Lt_127_9474;
	.loc	21	126	0
	cvt.ftz.sat.f32.f32 	%f35, %f4;
	mul.lo.u64 	%rd9, %rd7, 8;
	add.u64 	%rd10, %rd8, %rd9;
	mul.ftz.f32 	%f36, %f31, %f35;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f36;
	mov.b32		%r31, %b1; }
	mul.ftz.f32 	%f37, %f21, %f35;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f37;
	mov.b32		%r32, %b1; }
	mul.ftz.f32 	%f38, %f11, %f35;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f38;
	mov.b32		%r33, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f35;
	mov.b32		%r34, %b1; }
	st.global.v4.u16 	[%rd10+0], {%r31,%r32,%r33,%r34};
	.loc	6	600	0
	bra.uni 	$Lt_127_9218;
$Lt_127_9474:
	.loc	21	126	0
	cvt.ftz.sat.f32.f32 	%f39, %f4;
	mul.lo.u64 	%rd11, %rd7, 16;
	add.u64 	%rd12, %rd8, %rd11;
	mul.ftz.f32 	%f40, %f31, %f39;
	mul.ftz.f32 	%f41, %f21, %f39;
	mul.ftz.f32 	%f42, %f11, %f39;
	st.global.v4.f32 	[%rd12+0], {%f40,%f41,%f42,%f39};
$Lt_127_9218:
$Lt_127_6658:
	.loc	6	602	0
	exit;
$LDWend_cuda_motion_prepsource:
	} // cuda_motion_prepsource
	.global .texref sHorizontalOnlyScaleTexture;

	.entry HorizontalOnlyScaleKernel (
		.param .s32 __cudaparm_HorizontalOnlyScaleKernel_inSrcWidth,
		.param .s32 __cudaparm_HorizontalOnlyScaleKernel_inSrcHeight,
		.param .u64 __cudaparm_HorizontalOnlyScaleKernel_inDest,
		.param .s32 __cudaparm_HorizontalOnlyScaleKernel_inDestPitch,
		.param .s32 __cudaparm_HorizontalOnlyScaleKernel_inDestWidth,
		.param .s32 __cudaparm_HorizontalOnlyScaleKernel_inDestHeight,
		.param .u32 __cudaparm_HorizontalOnlyScaleKernel_inDeviceFormat,
		.param .f32 __cudaparm_HorizontalOnlyScaleKernel_inPositionX,
		.param .f32 __cudaparm_HorizontalOnlyScaleKernel_inAnchorX,
		.param .f32 __cudaparm_HorizontalOnlyScaleKernel_inRecipScaleX,
		.param .f32 __cudaparm_HorizontalOnlyScaleKernel_inTransformY,
		.param .f32 __cudaparm_HorizontalOnlyScaleKernel_inAlphaGain,
		.param .u32 __cudaparm_HorizontalOnlyScaleKernel_inBlendMode,
		.param .s8 __cudaparm_HorizontalOnlyScaleKernel_inDoCompositeOver)
	{
	.reg .u32 %r<115>;
	.reg .u64 %rd<22>;
	.reg .f32 %f<1885>;
	.reg .pred %p<152>;
	.shared .align 4 .b8 __cuda___cuda_local_var_304428_31_non_const_Xcoeffs2348[256];
	.loc	6	643	0
$LDWbegin_HorizontalOnlyScaleKernel:
	.loc	6	649	0
	cvt.s32.u32 	%r1, %ctaid.x;
	cvt.s32.u32 	%r2, %ntid.x;
	mul.lo.s32 	%r3, %r1, %r2;
	mov.u32 	%r4, %tid.x;
	add.u32 	%r5, %r3, %r4;
	mov.u32 	%r6, %tid.y;
	mov.u32 	%r7, 3;
	setp.gt.u32 	%p1, %r6, %r7;
	@%p1 bra 	$Lt_128_268802;
	.loc	6	658	0
	cvt.rn.f32.u32 	%f1, %r6;
	cvt.rn.f32.s32 	%f2, %r5;
	ld.param.f32 	%f3, [__cudaparm_HorizontalOnlyScaleKernel_inPositionX];
	add.ftz.f32 	%f4, %f3, %f2;
	ld.param.f32 	%f5, [__cudaparm_HorizontalOnlyScaleKernel_inAnchorX];
	ld.param.f32 	%f6, [__cudaparm_HorizontalOnlyScaleKernel_inRecipScaleX];
	fma.rn.ftz.f32 	%f7, %f6, %f4, %f5;
	mov.f32 	%f8, 0fbfc00000;     	// -1.5
	add.ftz.f32 	%f9, %f7, %f8;
	mov.f32 	%f10, 0fbf000000;    	// -0.5
	add.ftz.f32 	%f11, %f7, %f10;
	cvt.rmi.ftz.f32.f32 	%f12, %f9;
	add.ftz.f32 	%f13, %f1, %f12;
	sub.ftz.f32 	%f14, %f11, %f13;
	abs.ftz.f32 	%f15, %f14;
	mov.f32 	%f16, 0f40000000;    	// 2
	setp.ge.ftz.f32 	%p2, %f15, %f16;
	@!%p2 bra 	$Lt_128_269570;
	mov.f32 	%f17, 0f00000000;    	// 0
	bra.uni 	$Lt_128_269314;
$Lt_128_269570:
	.loc	6	618	0
	mov.f32 	%f18, 0f4019999a;    	// 2.4
	mov.f32 	%f19, 0fc099999a;    	// -4.8
	mov.f32 	%f20, 0fc0a00000;    	// -5
	add.ftz.f32 	%f21, %f15, %f20;
	mov.f32 	%f22, 0fbf19999a;    	// -0.6
	mul.ftz.f32 	%f23, %f21, %f22;
	fma.rn.ftz.f32 	%f24, %f15, %f23, %f19;
	fma.rn.ftz.f32 	%f25, %f15, %f24, %f18;
	mov.f32 	%f26, 0f3f800000;    	// 1
	mov.f32 	%f27, 0fc019999a;    	// -2.4
	mov.f32 	%f28, 0f3fb33333;    	// 1.4
	fma.rn.ftz.f32 	%f29, %f28, %f15, %f27;
	mul.ftz.f32 	%f30, %f15, %f29;
	fma.rn.ftz.f32 	%f31, %f15, %f30, %f26;
	mov.f32 	%f32, 0f3f800000;    	// 1
	setp.ge.ftz.f32 	%p3, %f15, %f32;
	selp.f32 	%f17, %f25, %f31, %p3;
$Lt_128_269314:
	.loc	6	658	0
	mov.u64 	%rd1, __cuda___cuda_local_var_304428_31_non_const_Xcoeffs2348;
	cvt.u64.u32 	%rd2, %r4;
	cvt.u64.u32 	%rd3, %r6;
	mul.wide.u32 	%rd4, %r6, 16;
	add.u64 	%rd5, %rd2, %rd4;
	mul.lo.u64 	%rd6, %rd5, 4;
	add.u64 	%rd7, %rd1, %rd6;
	st.shared.f32 	[%rd7+0], %f17;
$Lt_128_268802:
	mov.u64 	%rd1, __cuda___cuda_local_var_304428_31_non_const_Xcoeffs2348;
	.loc	6	661	0
	bar.sync 	0;
	cvt.s32.u32 	%r8, %ctaid.y;
	cvt.s32.u32 	%r9, %ntid.y;
	mul.lo.s32 	%r10, %r8, %r9;
	add.u32 	%r11, %r10, %r6;
	ld.param.s32 	%r12, [__cudaparm_HorizontalOnlyScaleKernel_inDestWidth];
	ld.param.s32 	%r13, [__cudaparm_HorizontalOnlyScaleKernel_inDestHeight];
	set.gt.u32.s32 	%r14, %r13, %r11;
	neg.s32 	%r15, %r14;
	set.gt.u32.s32 	%r16, %r12, %r5;
	neg.s32 	%r17, %r16;
	and.b32 	%r18, %r15, %r17;
	mov.u32 	%r19, 0;
	setp.eq.s32 	%p4, %r18, %r19;
	@%p4 bra 	$Lt_128_317954;
	.loc	6	666	0
	cvt.rn.f32.s32 	%f33, %r5;
	ld.param.f32 	%f34, [__cudaparm_HorizontalOnlyScaleKernel_inPositionX];
	add.ftz.f32 	%f35, %f34, %f33;
	ld.param.f32 	%f36, [__cudaparm_HorizontalOnlyScaleKernel_inAnchorX];
	ld.param.f32 	%f37, [__cudaparm_HorizontalOnlyScaleKernel_inRecipScaleX];
	fma.rn.ftz.f32 	%f7, %f37, %f35, %f36;
	mov.f32 	%f38, 0f00000000;    	// 0
	setp.ge.ftz.f32 	%p5, %f7, %f38;
	@!%p5 bra 	$Lt_128_318466;
	ld.param.s32 	%r20, [__cudaparm_HorizontalOnlyScaleKernel_inSrcWidth];
	cvt.rn.f32.s32 	%f39, %r20;
	setp.gt.ftz.f32 	%p6, %f39, %f7;
	@!%p6 bra 	$Lt_128_318722;
	cvt.rn.f32.s32 	%f40, %r11;
	ld.param.f32 	%f41, [__cudaparm_HorizontalOnlyScaleKernel_inTransformY];
	add.ftz.f32 	%f42, %f40, %f41;
	mov.f32 	%f43, 0fbfc00000;    	// -1.5
	add.ftz.f32 	%f44, %f7, %f43;
	cvt.rmi.ftz.f32.f32 	%f12, %f44;
	mov.f32 	%f45, 0f3f000000;    	// 0.5
	add.ftz.f32 	%f46, %f12, %f45;
	mov.f32 	%f47, %f46;
	mov.f32 	%f48, %f42;
	mov.f32 	%f49, 0f00000000;    	// 0
	mov.f32 	%f50, %f49;
	mov.f32 	%f51, 0f00000000;    	// 0
	mov.f32 	%f52, %f51;
	tex.2d.v4.f32.f32 {%f53,%f54,%f55,%f56},[sHorizontalOnlyScaleTexture,{%f47,%f48,%f50,%f52}];
	.loc	6	672	0
	mov.f32 	%f57, %f53;
	mov.f32 	%f58, %f54;
	mov.f32 	%f59, %f55;
	mov.f32 	%f60, %f56;
	mov.f32 	%f61, 0f3fc00000;    	// 1.5
	add.ftz.f32 	%f62, %f12, %f61;
	mov.f32 	%f63, %f62;
	mov.f32 	%f64, %f42;
	mov.f32 	%f65, 0f00000000;    	// 0
	mov.f32 	%f66, %f65;
	mov.f32 	%f67, 0f00000000;    	// 0
	mov.f32 	%f68, %f67;
	tex.2d.v4.f32.f32 {%f69,%f70,%f71,%f72},[sHorizontalOnlyScaleTexture,{%f63,%f64,%f66,%f68}];
	.loc	6	673	0
	mov.f32 	%f73, %f69;
	mov.f32 	%f74, %f70;
	mov.f32 	%f75, %f71;
	mov.f32 	%f76, %f72;
	mov.f32 	%f77, 0f40200000;    	// 2.5
	add.ftz.f32 	%f78, %f12, %f77;
	mov.f32 	%f79, %f78;
	mov.f32 	%f80, %f42;
	mov.f32 	%f81, 0f00000000;    	// 0
	mov.f32 	%f82, %f81;
	mov.f32 	%f83, 0f00000000;    	// 0
	mov.f32 	%f84, %f83;
	tex.2d.v4.f32.f32 {%f85,%f86,%f87,%f88},[sHorizontalOnlyScaleTexture,{%f79,%f80,%f82,%f84}];
	.loc	6	674	0
	mov.f32 	%f89, %f85;
	mov.f32 	%f90, %f86;
	mov.f32 	%f91, %f87;
	mov.f32 	%f92, %f88;
	mov.f32 	%f93, 0f40600000;    	// 3.5
	add.ftz.f32 	%f94, %f12, %f93;
	mov.f32 	%f95, %f94;
	mov.f32 	%f96, %f42;
	mov.f32 	%f97, 0f00000000;    	// 0
	mov.f32 	%f98, %f97;
	mov.f32 	%f99, 0f00000000;    	// 0
	mov.f32 	%f100, %f99;
	tex.2d.v4.f32.f32 {%f101,%f102,%f103,%f104},[sHorizontalOnlyScaleTexture,{%f95,%f96,%f98,%f100}];
	.loc	6	675	0
	mov.f32 	%f105, %f101;
	mov.f32 	%f106, %f102;
	mov.f32 	%f107, %f103;
	mov.f32 	%f108, %f104;
	.loc	6	677	0
	cvt.u64.u32 	%rd8, %r4;
	mul.wide.u32 	%rd9, %r4, 4;
	add.u64 	%rd10, %rd1, %rd9;
	ld.shared.f32 	%f109, [%rd10+0];
	.loc	6	679	0
	mul.ftz.f32 	%f110, %f109, %f57;
	.loc	6	680	0
	mul.ftz.f32 	%f111, %f109, %f58;
	.loc	6	681	0
	mul.ftz.f32 	%f112, %f109, %f59;
	.loc	6	682	0
	mul.ftz.f32 	%f113, %f109, %f60;
	.loc	6	684	0
	ld.shared.f32 	%f114, [%rd10+64];
	.loc	6	685	0
	add.ftz.f32 	%f115, %f109, %f114;
	.loc	6	686	0
	fma.rn.ftz.f32 	%f116, %f73, %f114, %f110;
	.loc	6	687	0
	fma.rn.ftz.f32 	%f117, %f114, %f74, %f111;
	.loc	6	688	0
	fma.rn.ftz.f32 	%f118, %f114, %f75, %f112;
	.loc	6	689	0
	fma.rn.ftz.f32 	%f119, %f114, %f76, %f113;
	.loc	6	691	0
	ld.shared.f32 	%f120, [%rd10+128];
	.loc	6	692	0
	add.ftz.f32 	%f121, %f120, %f115;
	.loc	6	693	0
	fma.rn.ftz.f32 	%f122, %f89, %f120, %f116;
	.loc	6	694	0
	fma.rn.ftz.f32 	%f123, %f120, %f90, %f117;
	.loc	6	695	0
	fma.rn.ftz.f32 	%f124, %f120, %f91, %f118;
	.loc	6	696	0
	fma.rn.ftz.f32 	%f125, %f120, %f92, %f119;
	.loc	6	698	0
	ld.shared.f32 	%f126, [%rd10+192];
	.loc	6	699	0
	add.ftz.f32 	%f127, %f126, %f121;
	.loc	6	700	0
	fma.rn.ftz.f32 	%f128, %f105, %f126, %f122;
	.loc	6	701	0
	fma.rn.ftz.f32 	%f129, %f126, %f106, %f123;
	.loc	6	702	0
	fma.rn.ftz.f32 	%f130, %f126, %f107, %f124;
	.loc	6	703	0
	fma.rn.ftz.f32 	%f131, %f126, %f108, %f125;
	.loc	6	706	0
	rcp.approx.ftz.f32 	%f132, %f127;
	mul.ftz.f32 	%f133, %f132, %f128;
	.loc	6	707	0
	mul.ftz.f32 	%f134, %f132, %f129;
	.loc	6	708	0
	mul.ftz.f32 	%f135, %f132, %f130;
	.loc	6	709	0
	mul.ftz.f32 	%f136, %f132, %f131;
	bra.uni 	$L_128_268290;
$Lt_128_318466:
	mov.f32 	%f136, 0f00000000;   	// 0
	mov.f32 	%f135, 0f00000000;   	// 0
	mov.f32 	%f134, 0f00000000;   	// 0
	mov.f32 	%f133, 0f00000000;   	// 0
	bra.uni 	$L_128_268290;
$Lt_128_318722:
	mov.f32 	%f136, 0f00000000;   	// 0
	mov.f32 	%f135, 0f00000000;   	// 0
	mov.f32 	%f134, 0f00000000;   	// 0
	mov.f32 	%f133, 0f00000000;   	// 0
$L_128_268290:
	.loc	6	712	0
	mov.f32 	%f137, %f133;
	mov.f32 	%f138, %f134;
	mov.f32 	%f139, %f135;
	mov.f32 	%f140, %f136;
	ld.param.u32 	%r21, [__cudaparm_HorizontalOnlyScaleKernel_inBlendMode];
	mov.s32 	%r22, 18;
	setp.eq.s32 	%p7, %r21, %r22;
	ld.param.u32 	%r23, [__cudaparm_HorizontalOnlyScaleKernel_inDeviceFormat];
	mov.s32 	%r24, 0;
	setp.eq.s32 	%p8, %r23, %r24;
	ld.param.s32 	%r25, [__cudaparm_HorizontalOnlyScaleKernel_inDestPitch];
	mul.lo.s32 	%r26, %r25, %r11;
	add.s32 	%r27, %r5, %r26;
	cvt.s64.s32 	%rd11, %r27;
	ld.param.u64 	%rd12, [__cudaparm_HorizontalOnlyScaleKernel_inDest];
	ld.param.s8 	%r28, [__cudaparm_HorizontalOnlyScaleKernel_inDoCompositeOver];
	mov.u32 	%r29, 0;
	setp.eq.s32 	%p9, %r28, %r29;
	@%p9 bra 	$Lt_128_270594;
	@!%p8 bra 	$Lt_128_271106;
	.loc	21	115	0
	mul.lo.u64 	%rd13, %rd11, 8;
	add.u64 	%rd14, %rd12, %rd13;
	ld.global.v4.u16 	{%r30,%r31,%r32,%r33}, [%rd14+0];
	.loc	6	166	0
	{ .reg .b32 %b1;
	mov.b32		%b1, %r30;
	cvt.ftz.f32.f16	%f141, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r31;
	cvt.ftz.f32.f16	%f142, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r32;
	cvt.ftz.f32.f16	%f143, %b1; }
	{ .reg .b32 %b1;
	mov.b32		%b1, %r33;
	cvt.ftz.f32.f16	%f144, %b1; }
	bra.uni 	$Lt_128_270850;
$Lt_128_271106:
	mul.lo.u64 	%rd15, %rd11, 16;
	add.u64 	%rd16, %rd12, %rd15;
	ld.global.v4.f32 	{%f141,%f142,%f143,%f144}, [%rd16+0];
$Lt_128_270850:
	@!%p7 bra 	$Lt_128_271618;
	.loc	6	170	0
	ld.param.f32 	%f145, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f146, %f145, %f140;
	cvt.ftz.sat.f32.f32 	%f147, %f144;
	mov.f32 	%f148, 0f3f800000;   	// 1
	sub.ftz.f32 	%f149, %f148, %f146;
	mul.ftz.f32 	%f150, %f147, %f149;
	add.ftz.f32 	%f151, %f150, %f146;
	mov.f32 	%f152, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f153, %f151, %f152;
	mov.f32 	%f154, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p10, %f153, %f154;
	@!%p10 bra 	$Lt_128_272130;
	mov.f32 	%f155, 0f00000000;   	// 0
	mov.f32 	%f156, 0f00000000;   	// 0
	mov.f32 	%f157, 0f00000000;   	// 0
	mov.f32 	%f158, 0f00000000;   	// 0
	bra.uni 	$Lt_128_271874;
$Lt_128_272130:
	mov.f32 	%f159, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p11, %f141, %f159;
	@!%p11 bra 	$Lt_128_272386;
	.loc	5	234	0
	neg.ftz.f32 	%f160, %f141;
	lg2.approx.ftz.f32 	%f161, %f160;
	mov.f32 	%f162, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f163, %f161, %f162;
	ex2.approx.ftz.f32 	%f164, %f163;
	neg.ftz.f32 	%f165, %f164;
	bra.uni 	$LDWendi___log2f_305_74;
$Lt_128_272386:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f166, %f141;
	mov.f32 	%f167, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f168, %f166, %f167;
	ex2.approx.ftz.f32 	%f165, %f168;
$LDWendi___log2f_305_74:
	.loc	22	97	0
	mov.f32 	%f169, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p12, %f142, %f169;
	@!%p12 bra 	$Lt_128_272898;
	.loc	5	234	0
	neg.ftz.f32 	%f170, %f142;
	lg2.approx.ftz.f32 	%f171, %f170;
	mov.f32 	%f172, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f173, %f171, %f172;
	ex2.approx.ftz.f32 	%f174, %f173;
	neg.ftz.f32 	%f175, %f174;
	bra.uni 	$LDWendi___log2f_305_72;
$Lt_128_272898:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f176, %f142;
	mov.f32 	%f177, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f178, %f176, %f177;
	ex2.approx.ftz.f32 	%f175, %f178;
$LDWendi___log2f_305_72:
	.loc	22	98	0
	mov.f32 	%f179, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p13, %f143, %f179;
	@!%p13 bra 	$Lt_128_273410;
	.loc	5	234	0
	neg.ftz.f32 	%f180, %f143;
	lg2.approx.ftz.f32 	%f181, %f180;
	mov.f32 	%f182, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f183, %f181, %f182;
	ex2.approx.ftz.f32 	%f184, %f183;
	neg.ftz.f32 	%f185, %f184;
	bra.uni 	$LDWendi___log2f_305_70;
$Lt_128_273410:
	.loc	5	236	0
	lg2.approx.ftz.f32 	%f186, %f143;
	mov.f32 	%f187, 0f400ccccd;   	// 2.2
	mul.ftz.f32 	%f188, %f186, %f187;
	ex2.approx.ftz.f32 	%f185, %f188;
$LDWendi___log2f_305_70:
	.loc	5	208	0
	cvt.ftz.sat.f32.f32 	%f189, %f151;
	mov.f32 	%f190, %f189;
	mov.f32 	%f191, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f192, %f189, %f191;
	mov.f32 	%f193, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p14, %f192, %f193;
	@%p14 bra 	$Lt_128_274178;
	.loc	5	213	0
	rcp.approx.ftz.f32 	%f194, %f189;
	mul.ftz.f32 	%f195, %f145, %f139;
	fma.rn.ftz.f32 	%f196, %f150, %f185, %f195;
	mul.ftz.f32 	%f197, %f194, %f196;
	.loc	5	214	0
	mul.ftz.f32 	%f198, %f145, %f138;
	fma.rn.ftz.f32 	%f199, %f150, %f175, %f198;
	mul.ftz.f32 	%f200, %f194, %f199;
	.loc	5	215	0
	mul.ftz.f32 	%f201, %f145, %f137;
	fma.rn.ftz.f32 	%f202, %f150, %f165, %f201;
	mul.ftz.f32 	%f203, %f194, %f202;
	bra.uni 	$Lt_128_273922;
$Lt_128_274178:
	.loc	5	219	0
	mov.f32 	%f197, 0f00000000;   	// 0
	mov.f32 	%f200, 0f00000000;   	// 0
	mov.f32 	%f203, 0f00000000;   	// 0
	mov.f32 	%f190, 0f00000000;   	// 0
$Lt_128_273922:
	.loc	5	266	0
	mov.f32 	%f204, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p15, %f203, %f204;
	@!%p15 bra 	$Lt_128_274434;
	.loc	5	242	0
	neg.ftz.f32 	%f205, %f203;
	lg2.approx.ftz.f32 	%f206, %f205;
	mov.f32 	%f207, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f208, %f206, %f207;
	ex2.approx.ftz.f32 	%f209, %f208;
	neg.ftz.f32 	%f210, %f209;
	bra.uni 	$LDWendi___log2f_305_68;
$Lt_128_274434:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f211, %f203;
	mov.f32 	%f212, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f213, %f211, %f212;
	ex2.approx.ftz.f32 	%f210, %f213;
$LDWendi___log2f_305_68:
	.loc	5	267	0
	mov.f32 	%f214, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p16, %f200, %f214;
	@!%p16 bra 	$Lt_128_274946;
	.loc	5	242	0
	neg.ftz.f32 	%f215, %f200;
	lg2.approx.ftz.f32 	%f216, %f215;
	mov.f32 	%f217, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f218, %f216, %f217;
	ex2.approx.ftz.f32 	%f219, %f218;
	neg.ftz.f32 	%f220, %f219;
	bra.uni 	$LDWendi___log2f_305_66;
$Lt_128_274946:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f221, %f200;
	mov.f32 	%f222, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f223, %f221, %f222;
	ex2.approx.ftz.f32 	%f220, %f223;
$LDWendi___log2f_305_66:
	.loc	5	268	0
	mov.f32 	%f224, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p17, %f197, %f224;
	@!%p17 bra 	$Lt_128_275458;
	.loc	5	242	0
	neg.ftz.f32 	%f225, %f197;
	lg2.approx.ftz.f32 	%f226, %f225;
	mov.f32 	%f227, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f228, %f226, %f227;
	ex2.approx.ftz.f32 	%f229, %f228;
	neg.ftz.f32 	%f230, %f229;
	bra.uni 	$LDWendi___log2f_305_64;
$Lt_128_275458:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f231, %f197;
	mov.f32 	%f232, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f233, %f231, %f232;
	ex2.approx.ftz.f32 	%f230, %f233;
$LDWendi___log2f_305_64:
	.loc	22	101	0
	mov.f32 	%f158, %f210;
	mov.f32 	%f157, %f220;
	mov.f32 	%f156, %f230;
	mov.f32 	%f155, %f190;
$Lt_128_271874:
	.loc	6	170	0
	mov.f32 	%f137, %f158;
	mov.f32 	%f138, %f157;
	mov.f32 	%f139, %f156;
	mov.f32 	%f140, %f155;
	bra.uni 	$Lt_128_315394;
$Lt_128_271618:
	mov.f32 	%f234, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f235, %f140, %f234;
	mov.f32 	%f236, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p18, %f235, %f236;
	@!%p18 bra 	$Lt_128_276226;
	mov.f32 	%f140, 0f00000000;   	// 0
	mov.f32 	%f139, 0f00000000;   	// 0
	mov.f32 	%f138, 0f00000000;   	// 0
	mov.f32 	%f137, 0f00000000;   	// 0
	bra.uni 	$Lt_128_275970;
$Lt_128_276226:
	.loc	6	183	0
	rcp.approx.ftz.f32 	%f237, %f140;
	mul.ftz.f32 	%f238, %f237, %f137;
	mov.f32 	%f239, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p19, %f238, %f239;
	@!%p19 bra 	$Lt_128_276482;
	.loc	5	242	0
	neg.ftz.f32 	%f240, %f238;
	lg2.approx.ftz.f32 	%f241, %f240;
	mov.f32 	%f242, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f243, %f241, %f242;
	ex2.approx.ftz.f32 	%f244, %f243;
	neg.ftz.f32 	%f245, %f244;
	bra.uni 	$LDWendi___log2f_305_62;
$Lt_128_276482:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f246, %f238;
	mov.f32 	%f247, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f248, %f246, %f247;
	ex2.approx.ftz.f32 	%f245, %f248;
$LDWendi___log2f_305_62:
	.loc	6	183	0
	mov.f32 	%f137, %f245;
	.loc	6	184	0
	mul.ftz.f32 	%f249, %f237, %f138;
	mov.f32 	%f250, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p20, %f249, %f250;
	@!%p20 bra 	$Lt_128_276994;
	.loc	5	242	0
	neg.ftz.f32 	%f251, %f249;
	lg2.approx.ftz.f32 	%f252, %f251;
	mov.f32 	%f253, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f254, %f252, %f253;
	ex2.approx.ftz.f32 	%f255, %f254;
	neg.ftz.f32 	%f256, %f255;
	bra.uni 	$LDWendi___log2f_305_60;
$Lt_128_276994:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f257, %f249;
	mov.f32 	%f258, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f259, %f257, %f258;
	ex2.approx.ftz.f32 	%f256, %f259;
$LDWendi___log2f_305_60:
	.loc	6	184	0
	mov.f32 	%f138, %f256;
	.loc	6	185	0
	mul.ftz.f32 	%f260, %f237, %f139;
	mov.f32 	%f261, 0f00000000;   	// 0
	setp.lt.ftz.f32 	%p21, %f260, %f261;
	@!%p21 bra 	$Lt_128_277506;
	.loc	5	242	0
	neg.ftz.f32 	%f262, %f260;
	lg2.approx.ftz.f32 	%f263, %f262;
	mov.f32 	%f264, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f265, %f263, %f264;
	ex2.approx.ftz.f32 	%f266, %f265;
	neg.ftz.f32 	%f267, %f266;
	bra.uni 	$LDWendi___log2f_305_58;
$Lt_128_277506:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f268, %f260;
	mov.f32 	%f269, 0f3ee8ba2e;   	// 0.454545
	mul.ftz.f32 	%f270, %f268, %f269;
	ex2.approx.ftz.f32 	%f267, %f270;
$LDWendi___log2f_305_58:
	.loc	6	185	0
	mov.f32 	%f139, %f267;
$Lt_128_275970:
	.loc	6	189	0
	mov.u32 	%r34, 3;
	setp.eq.s32 	%p22, %r21, %r34;
	@%p22 bra 	$Lt_128_258;
	mov.u32 	%r35, 11;
	setp.eq.s32 	%p23, %r21, %r35;
	@%p23 bra 	$Lt_128_770;
	mov.u32 	%r36, 17;
	setp.eq.s32 	%p24, %r21, %r36;
	@%p24 bra 	$Lt_128_1026;
	mov.u32 	%r37, 22;
	setp.eq.s32 	%p25, %r21, %r37;
	@%p25 bra 	$Lt_128_1282;
	mov.u32 	%r38, 6;
	setp.eq.s32 	%p26, %r21, %r38;
	@%p26 bra 	$Lt_128_1538;
	mov.u32 	%r39, 1;
	setp.eq.s32 	%p27, %r21, %r39;
	@%p27 bra 	$Lt_128_1794;
	mov.u32 	%r40, 13;
	setp.eq.s32 	%p28, %r21, %r40;
	@%p28 bra 	$Lt_128_2050;
	mov.u32 	%r41, 4;
	setp.eq.s32 	%p29, %r21, %r41;
	@%p29 bra 	$Lt_128_2306;
	mov.u32 	%r42, 2;
	setp.eq.s32 	%p30, %r21, %r42;
	@%p30 bra 	$Lt_128_2562;
	mov.u32 	%r43, 14;
	setp.eq.s32 	%p31, %r21, %r43;
	@%p31 bra 	$Lt_128_2818;
	mov.u32 	%r44, 12;
	setp.eq.s32 	%p32, %r21, %r44;
	@%p32 bra 	$Lt_128_3074;
	mov.u32 	%r45, 19;
	setp.eq.s32 	%p33, %r21, %r45;
	@%p33 bra 	$Lt_128_3330;
	mov.u32 	%r46, 23;
	setp.eq.s32 	%p34, %r21, %r46;
	@%p34 bra 	$Lt_128_3586;
	mov.u32 	%r47, 8;
	setp.eq.s32 	%p35, %r21, %r47;
	@%p35 bra 	$Lt_128_3842;
	mov.u32 	%r48, 24;
	setp.eq.s32 	%p36, %r21, %r48;
	@%p36 bra 	$Lt_128_4098;
	mov.u32 	%r49, 15;
	setp.eq.s32 	%p37, %r21, %r49;
	@%p37 bra 	$Lt_128_4354;
	mov.u32 	%r50, 20;
	setp.eq.s32 	%p38, %r21, %r50;
	@%p38 bra 	$Lt_128_4610;
	mov.u32 	%r51, 9;
	setp.eq.s32 	%p39, %r21, %r51;
	@%p39 bra 	$Lt_128_4866;
	mov.u32 	%r52, 5;
	setp.eq.s32 	%p40, %r21, %r52;
	@%p40 bra 	$Lt_128_5122;
	mov.u32 	%r53, 7;
	setp.eq.s32 	%p41, %r21, %r53;
	@%p41 bra 	$Lt_128_5378;
	mov.u32 	%r54, 25;
	setp.eq.s32 	%p42, %r21, %r54;
	@%p42 bra 	$Lt_128_5634;
	mov.u32 	%r55, 26;
	setp.eq.s32 	%p43, %r21, %r55;
	@%p43 bra 	$Lt_128_5890;
	mov.u32 	%r56, 10;
	setp.eq.s32 	%p44, %r21, %r56;
	@%p44 bra 	$Lt_128_6146;
	mov.u32 	%r57, 21;
	setp.eq.s32 	%p45, %r21, %r57;
	@%p45 bra 	$Lt_128_6402;
	mov.u32 	%r58, 0;
	setp.eq.s32 	%p46, %r21, %r58;
	@%p46 bra 	$Lt_128_6658;
	mov.u32 	%r59, 16;
	setp.eq.s32 	%p47, %r21, %r59;
	@%p47 bra 	$Lt_128_6914;
	bra.uni 	$Lt_128_315394;
$Lt_128_258:
	.loc	22	469	0
	ld.param.f32 	%f271, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f271, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f274, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f274;
	mov.f32 	%f276, %f275;
	mov.f32 	%f277, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f278, %f275, %f277;
	mov.f32 	%f279, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p48, %f278, %f279;
	@!%p48 bra 	$Lt_128_278274;
	mov.f32 	%f280, 0f00000000;   	// 0
	mov.f32 	%f281, 0f00000000;   	// 0
	mov.f32 	%f282, 0f00000000;   	// 0
	mov.f32 	%f276, 0f00000000;   	// 0
	bra.uni 	$Lt_128_278018;
$Lt_128_278274:
	mov.f32 	%f283, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f283, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f287, 0f3f800000;   	// 1
	mul.ftz.f32 	%f288, %f272, %f285;
	sub.ftz.f32 	%f289, %f287, %f288;
	min.ftz.f32 	%f290, %f141, %f137;
	mul.ftz.f32 	%f291, %f144, %f290;
	fma.rn.ftz.f32 	%f292, %f137, %f284, %f291;
	mul.ftz.f32 	%f293, %f286, %f292;
	fma.rn.ftz.f32 	%f282, %f141, %f289, %f293;
	min.ftz.f32 	%f294, %f142, %f138;
	mul.ftz.f32 	%f295, %f144, %f294;
	fma.rn.ftz.f32 	%f296, %f138, %f284, %f295;
	mul.ftz.f32 	%f297, %f286, %f296;
	fma.rn.ftz.f32 	%f281, %f142, %f289, %f297;
	min.ftz.f32 	%f298, %f143, %f139;
	mul.ftz.f32 	%f299, %f144, %f298;
	fma.rn.ftz.f32 	%f300, %f139, %f284, %f299;
	mul.ftz.f32 	%f301, %f286, %f300;
	fma.rn.ftz.f32 	%f280, %f143, %f289, %f301;
$Lt_128_278018:
	.loc	6	191	0
	mov.f32 	%f137, %f282;
	mov.f32 	%f138, %f281;
	mov.f32 	%f139, %f280;
	mov.f32 	%f140, %f276;
	bra.uni 	$Lt_128_315394;
$Lt_128_770:
	.loc	22	470	0
	ld.param.f32 	%f302, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f302, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f303, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f303;
	mov.f32 	%f304, %f275;
	mov.f32 	%f305, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f306, %f275, %f305;
	mov.f32 	%f307, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p49, %f306, %f307;
	@!%p49 bra 	$Lt_128_278786;
	mov.f32 	%f308, 0f00000000;   	// 0
	mov.f32 	%f309, 0f00000000;   	// 0
	mov.f32 	%f310, 0f00000000;   	// 0
	mov.f32 	%f304, 0f00000000;   	// 0
	bra.uni 	$Lt_128_278530;
$Lt_128_278786:
	mov.f32 	%f311, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f311, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f312, 0f3f800000;   	// 1
	mul.ftz.f32 	%f313, %f272, %f285;
	sub.ftz.f32 	%f289, %f312, %f313;
	max.ftz.f32 	%f314, %f141, %f137;
	mul.ftz.f32 	%f315, %f144, %f314;
	fma.rn.ftz.f32 	%f316, %f137, %f284, %f315;
	mul.ftz.f32 	%f317, %f286, %f316;
	fma.rn.ftz.f32 	%f310, %f141, %f289, %f317;
	max.ftz.f32 	%f318, %f142, %f138;
	mul.ftz.f32 	%f319, %f144, %f318;
	fma.rn.ftz.f32 	%f320, %f138, %f284, %f319;
	mul.ftz.f32 	%f321, %f286, %f320;
	fma.rn.ftz.f32 	%f309, %f142, %f289, %f321;
	max.ftz.f32 	%f322, %f143, %f139;
	mul.ftz.f32 	%f323, %f144, %f322;
	fma.rn.ftz.f32 	%f324, %f139, %f284, %f323;
	mul.ftz.f32 	%f325, %f286, %f324;
	fma.rn.ftz.f32 	%f308, %f143, %f289, %f325;
$Lt_128_278530:
	.loc	6	192	0
	mov.f32 	%f137, %f310;
	mov.f32 	%f138, %f309;
	mov.f32 	%f139, %f308;
	mov.f32 	%f140, %f304;
	bra.uni 	$Lt_128_315394;
$Lt_128_1026:
	.loc	22	471	0
	ld.param.f32 	%f326, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f326, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f327, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f327;
	mov.f32 	%f328, %f275;
	mov.f32 	%f329, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f330, %f275, %f329;
	mov.f32 	%f331, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p50, %f330, %f331;
	@!%p50 bra 	$Lt_128_279298;
	mov.f32 	%f332, 0f00000000;   	// 0
	mov.f32 	%f333, 0f00000000;   	// 0
	mov.f32 	%f334, 0f00000000;   	// 0
	mov.f32 	%f328, 0f00000000;   	// 0
	bra.uni 	$Lt_128_279042;
$Lt_128_279298:
	mov.f32 	%f335, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f335, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f336, 0f3f800000;   	// 1
	mul.ftz.f32 	%f337, %f272, %f285;
	sub.ftz.f32 	%f289, %f336, %f337;
	mul.ftz.f32 	%f338, %f141, %f137;
	mov.f32 	%f339, 0f00000000;   	// 0
	max.ftz.f32 	%f340, %f338, %f339;
	mov.f32 	%f341, 0f3f800000;   	// 1
	min.ftz.f32 	%f342, %f340, %f341;
	mul.ftz.f32 	%f343, %f144, %f342;
	fma.rn.ftz.f32 	%f344, %f137, %f284, %f343;
	mul.ftz.f32 	%f345, %f286, %f344;
	fma.rn.ftz.f32 	%f334, %f141, %f289, %f345;
	mul.ftz.f32 	%f346, %f142, %f138;
	mov.f32 	%f347, 0f00000000;   	// 0
	max.ftz.f32 	%f348, %f346, %f347;
	mov.f32 	%f349, 0f3f800000;   	// 1
	min.ftz.f32 	%f350, %f348, %f349;
	mul.ftz.f32 	%f351, %f144, %f350;
	fma.rn.ftz.f32 	%f352, %f138, %f284, %f351;
	mul.ftz.f32 	%f353, %f286, %f352;
	fma.rn.ftz.f32 	%f333, %f142, %f289, %f353;
	mul.ftz.f32 	%f354, %f143, %f139;
	mov.f32 	%f355, 0f00000000;   	// 0
	max.ftz.f32 	%f356, %f354, %f355;
	mov.f32 	%f357, 0f3f800000;   	// 1
	min.ftz.f32 	%f358, %f356, %f357;
	mul.ftz.f32 	%f359, %f144, %f358;
	fma.rn.ftz.f32 	%f360, %f139, %f284, %f359;
	mul.ftz.f32 	%f361, %f286, %f360;
	fma.rn.ftz.f32 	%f332, %f143, %f289, %f361;
$Lt_128_279042:
	.loc	6	193	0
	mov.f32 	%f137, %f334;
	mov.f32 	%f138, %f333;
	mov.f32 	%f139, %f332;
	mov.f32 	%f140, %f328;
	bra.uni 	$Lt_128_315394;
$Lt_128_1282:
	.loc	22	472	0
	ld.param.f32 	%f362, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f362, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f363, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f363;
	mov.f32 	%f364, %f275;
	mov.f32 	%f365, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f366, %f275, %f365;
	mov.f32 	%f367, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p51, %f366, %f367;
	@!%p51 bra 	$Lt_128_279810;
	mov.f32 	%f368, 0f00000000;   	// 0
	mov.f32 	%f369, 0f00000000;   	// 0
	mov.f32 	%f370, 0f00000000;   	// 0
	mov.f32 	%f364, 0f00000000;   	// 0
	bra.uni 	$Lt_128_279554;
$Lt_128_279810:
	mov.f32 	%f371, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f371, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f372, 0f3f800000;   	// 1
	mul.ftz.f32 	%f373, %f272, %f285;
	sub.ftz.f32 	%f289, %f372, %f373;
	mov.f32 	%f374, 0f3f800000;   	// 1
	mov.f32 	%f375, 0f3f800000;   	// 1
	mov.f32 	%f376, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f377, %f141, %f376;
	mov.f32 	%f378, 0f3f800000;   	// 1
	min.ftz.f32 	%f379, %f377, %f378;
	sub.ftz.f32 	%f380, %f375, %f379;
	mov.f32 	%f381, 0f3f800000;   	// 1
	mov.f32 	%f382, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f383, %f137, %f382;
	mov.f32 	%f384, 0f3f800000;   	// 1
	min.ftz.f32 	%f385, %f383, %f384;
	sub.ftz.f32 	%f386, %f381, %f385;
	mul.ftz.f32 	%f387, %f380, %f386;
	sub.ftz.f32 	%f388, %f374, %f387;
	mov.f32 	%f389, 0f00000000;   	// 0
	max.ftz.f32 	%f390, %f388, %f389;
	mov.f32 	%f391, 0f3f800000;   	// 1
	min.ftz.f32 	%f392, %f390, %f391;
	mul.ftz.f32 	%f393, %f144, %f392;
	fma.rn.ftz.f32 	%f394, %f137, %f284, %f393;
	mul.ftz.f32 	%f395, %f286, %f394;
	fma.rn.ftz.f32 	%f370, %f141, %f289, %f395;
	mov.f32 	%f396, 0f3f800000;   	// 1
	mov.f32 	%f397, 0f3f800000;   	// 1
	mov.f32 	%f398, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f399, %f142, %f398;
	mov.f32 	%f400, 0f3f800000;   	// 1
	min.ftz.f32 	%f401, %f399, %f400;
	sub.ftz.f32 	%f402, %f397, %f401;
	mov.f32 	%f403, 0f3f800000;   	// 1
	mov.f32 	%f404, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f405, %f138, %f404;
	mov.f32 	%f406, 0f3f800000;   	// 1
	min.ftz.f32 	%f407, %f405, %f406;
	sub.ftz.f32 	%f408, %f403, %f407;
	mul.ftz.f32 	%f409, %f402, %f408;
	sub.ftz.f32 	%f410, %f396, %f409;
	mov.f32 	%f411, 0f00000000;   	// 0
	max.ftz.f32 	%f412, %f410, %f411;
	mov.f32 	%f413, 0f3f800000;   	// 1
	min.ftz.f32 	%f414, %f412, %f413;
	mul.ftz.f32 	%f415, %f144, %f414;
	fma.rn.ftz.f32 	%f416, %f138, %f284, %f415;
	mul.ftz.f32 	%f417, %f286, %f416;
	fma.rn.ftz.f32 	%f369, %f142, %f289, %f417;
	mov.f32 	%f418, 0f3f800000;   	// 1
	mov.f32 	%f419, 0f3f800000;   	// 1
	mov.f32 	%f420, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f421, %f143, %f420;
	mov.f32 	%f422, 0f3f800000;   	// 1
	min.ftz.f32 	%f423, %f421, %f422;
	sub.ftz.f32 	%f424, %f419, %f423;
	mov.f32 	%f425, 0f3f800000;   	// 1
	mov.f32 	%f426, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f427, %f139, %f426;
	mov.f32 	%f428, 0f3f800000;   	// 1
	min.ftz.f32 	%f429, %f427, %f428;
	sub.ftz.f32 	%f430, %f425, %f429;
	mul.ftz.f32 	%f431, %f424, %f430;
	sub.ftz.f32 	%f432, %f418, %f431;
	mov.f32 	%f433, 0f00000000;   	// 0
	max.ftz.f32 	%f434, %f432, %f433;
	mov.f32 	%f435, 0f3f800000;   	// 1
	min.ftz.f32 	%f436, %f434, %f435;
	mul.ftz.f32 	%f437, %f144, %f436;
	fma.rn.ftz.f32 	%f438, %f139, %f284, %f437;
	mul.ftz.f32 	%f439, %f286, %f438;
	fma.rn.ftz.f32 	%f368, %f143, %f289, %f439;
$Lt_128_279554:
	.loc	6	194	0
	mov.f32 	%f137, %f370;
	mov.f32 	%f138, %f369;
	mov.f32 	%f139, %f368;
	mov.f32 	%f140, %f364;
	bra.uni 	$Lt_128_315394;
$Lt_128_1538:
	.loc	22	526	0
	ld.param.f32 	%f440, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f440, %f140;
	mov.f32 	%f441, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f442, %f272, %f441;
	mov.f32 	%f443, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p52, %f442, %f443;
	@!%p52 bra 	$Lt_128_235522;
	.loc	22	528	0
	mov.f32 	%f444, %f141;
	mov.f32 	%f445, %f142;
	mov.f32 	%f446, %f143;
	mov.f32 	%f447, %f144;
	bra.uni 	$LDWendi__Z4Randj_305_56;
$Lt_128_235522:
	.loc	22	530	0
	mov.f32 	%f448, 0f370637bd;   	// 8e-006
	add.ftz.f32 	%f449, %f272, %f448;
	mov.f32 	%f450, 0f3f800000;   	// 1
	setp.ge.ftz.f32 	%p53, %f449, %f450;
	@!%p53 bra 	$Lt_128_235778;
	.loc	22	532	0
	mov.f32 	%f444, %f137;
	mov.f32 	%f445, %f138;
	mov.f32 	%f446, %f139;
	mov.f32 	%f447, %f140;
	bra.uni 	$LDWendi__Z4Randj_305_56;
$Lt_128_235778:
	.loc	21	143	0
	mov.s32 	%r60, 1;
	sub.s32 	%r61, %r60, %r5;
	shr.u32 	%r62, %r11, 13;
	sub.u32 	%r63, %r5, %r11;
	sub.u32 	%r64, %r61, %r11;
	xor.b32 	%r65, %r62, %r64;
	shl.b32 	%r66, %r65, 8;
	sub.u32 	%r67, %r63, %r65;
	sub.u32 	%r68, %r11, %r65;
	xor.b32 	%r69, %r66, %r67;
	shr.u32 	%r70, %r69, 13;
	sub.u32 	%r71, %r68, %r69;
	sub.u32 	%r72, %r65, %r69;
	xor.b32 	%r73, %r70, %r71;
	shr.u32 	%r74, %r73, 12;
	sub.u32 	%r75, %r72, %r73;
	xor.b32 	%r76, %r74, %r75;
	sub.u32 	%r77, %r69, %r73;
	sub.u32 	%r78, %r77, %r76;
	shl.b32 	%r79, %r76, 16;
	xor.b32 	%r80, %r78, %r79;
	.loc	21	144	0
	sub.u32 	%r81, %r73, %r76;
	sub.u32 	%r82, %r81, %r80;
	shr.u32 	%r83, %r80, 5;
	xor.b32 	%r84, %r82, %r83;
	.loc	21	145	0
	sub.u32 	%r85, %r76, %r80;
	sub.u32 	%r86, %r85, %r84;
	shr.u32 	%r87, %r84, 3;
	xor.b32 	%r88, %r86, %r87;
	.loc	21	146	0
	sub.u32 	%r89, %r80, %r84;
	sub.u32 	%r90, %r89, %r88;
	shl.b32 	%r91, %r88, 10;
	xor.b32 	%r92, %r90, %r91;
	.loc	21	147	0
	sub.u32 	%r93, %r84, %r88;
	sub.u32 	%r94, %r93, %r92;
	shr.u32 	%r95, %r92, 15;
	xor.b32 	%r96, %r94, %r95;
	.loc	22	537	0
	mov.f32 	%f451, 0f46fffe00;   	// 32767
	mul.ftz.f32 	%f452, %f272, %f451;
	cvt.rzi.ftz.s32.f32 	%r97, %f452;
	mul.lo.u32 	%r98, %r96, 1103515245;
	add.u32 	%r99, %r98, 12345;
	shr.u32 	%r100, %r99, 16;
	and.b32 	%r101, %r100, 255;
	shl.b32 	%r102, %r101, 7;
	mul.lo.u32 	%r103, %r96, -1029531031;
	sub.u32 	%r104, %r103, 740551042;
	shr.u32 	%r105, %r104, 16;
	and.b32 	%r106, %r105, 255;
	xor.b32 	%r107, %r102, %r106;
	setp.lt.s32 	%p54, %r97, %r107;
	@%p54 bra 	$Lt_128_280322;
	mov.f32 	%f453, %f137;
	mov.f32 	%f454, %f138;
	mov.f32 	%f455, %f139;
	mov.f32 	%f456, %f140;
	bra.uni 	$Lt_128_280066;
$Lt_128_280322:
	mov.f32 	%f453, %f141;
	mov.f32 	%f454, %f142;
	mov.f32 	%f455, %f143;
	mov.f32 	%f456, %f144;
$Lt_128_280066:
	mov.f32 	%f444, %f453;
	mov.f32 	%f445, %f454;
	mov.f32 	%f446, %f455;
	mov.f32 	%f447, %f456;
$LDWendi__Z4Randj_305_56:
	.loc	6	195	0
	mov.f32 	%f137, %f444;
	mov.f32 	%f138, %f445;
	mov.f32 	%f139, %f446;
	mov.f32 	%f140, %f447;
	bra.uni 	$Lt_128_315394;
$Lt_128_1794:
	.loc	22	473	0
	ld.param.f32 	%f457, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f457, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f458, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f458;
	mov.f32 	%f459, %f275;
	mov.f32 	%f460, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f461, %f275, %f460;
	mov.f32 	%f462, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p55, %f461, %f462;
	@!%p55 bra 	$Lt_128_280834;
	mov.f32 	%f463, 0f00000000;   	// 0
	mov.f32 	%f464, 0f00000000;   	// 0
	mov.f32 	%f465, 0f00000000;   	// 0
	mov.f32 	%f459, 0f00000000;   	// 0
	bra.uni 	$Lt_128_280578;
$Lt_128_280834:
	mov.f32 	%f466, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f466, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f467, 0f3f800000;   	// 1
	mul.ftz.f32 	%f468, %f272, %f285;
	sub.ftz.f32 	%f289, %f467, %f468;
	mov.f32 	%f469, 0f3f800000;   	// 1
	mov.f32 	%f470, 0f3f800000;   	// 1
	mov.f32 	%f471, 0f00000000;   	// 0
	max.ftz.f32 	%f472, %f141, %f471;
	mov.f32 	%f473, 0f3f800000;   	// 1
	min.ftz.f32 	%f474, %f472, %f473;
	sub.ftz.f32 	%f475, %f470, %f474;
	mov.f32 	%f476, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f477, %f137, %f476;
	mov.f32 	%f478, 0f3f800000;   	// 1
	min.ftz.f32 	%f479, %f477, %f478;
	div.approx.ftz.f32 	%f480, %f475, %f479;
	sub.ftz.f32 	%f481, %f469, %f480;
	mov.f32 	%f482, 0f00000000;   	// 0
	max.ftz.f32 	%f483, %f481, %f482;
	mov.f32 	%f484, 0f3f800000;   	// 1
	min.ftz.f32 	%f485, %f483, %f484;
	mul.ftz.f32 	%f486, %f144, %f485;
	fma.rn.ftz.f32 	%f487, %f137, %f284, %f486;
	mul.ftz.f32 	%f488, %f286, %f487;
	fma.rn.ftz.f32 	%f465, %f141, %f289, %f488;
	mov.f32 	%f489, 0f3f800000;   	// 1
	mov.f32 	%f490, 0f3f800000;   	// 1
	mov.f32 	%f491, 0f00000000;   	// 0
	max.ftz.f32 	%f492, %f142, %f491;
	mov.f32 	%f493, 0f3f800000;   	// 1
	min.ftz.f32 	%f494, %f492, %f493;
	sub.ftz.f32 	%f495, %f490, %f494;
	mov.f32 	%f496, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f497, %f138, %f496;
	mov.f32 	%f498, 0f3f800000;   	// 1
	min.ftz.f32 	%f499, %f497, %f498;
	div.approx.ftz.f32 	%f500, %f495, %f499;
	sub.ftz.f32 	%f501, %f489, %f500;
	mov.f32 	%f502, 0f00000000;   	// 0
	max.ftz.f32 	%f503, %f501, %f502;
	mov.f32 	%f504, 0f3f800000;   	// 1
	min.ftz.f32 	%f505, %f503, %f504;
	mul.ftz.f32 	%f506, %f144, %f505;
	fma.rn.ftz.f32 	%f507, %f138, %f284, %f506;
	mul.ftz.f32 	%f508, %f286, %f507;
	fma.rn.ftz.f32 	%f464, %f142, %f289, %f508;
	mov.f32 	%f509, 0f3f800000;   	// 1
	mov.f32 	%f510, 0f3f800000;   	// 1
	mov.f32 	%f511, 0f00000000;   	// 0
	max.ftz.f32 	%f512, %f143, %f511;
	mov.f32 	%f513, 0f3f800000;   	// 1
	min.ftz.f32 	%f514, %f512, %f513;
	sub.ftz.f32 	%f515, %f510, %f514;
	mov.f32 	%f516, 0f33d6bf95;   	// 1e-007
	max.ftz.f32 	%f517, %f139, %f516;
	mov.f32 	%f518, 0f3f800000;   	// 1
	min.ftz.f32 	%f519, %f517, %f518;
	div.approx.ftz.f32 	%f520, %f515, %f519;
	sub.ftz.f32 	%f521, %f509, %f520;
	mov.f32 	%f522, 0f00000000;   	// 0
	max.ftz.f32 	%f523, %f521, %f522;
	mov.f32 	%f524, 0f3f800000;   	// 1
	min.ftz.f32 	%f525, %f523, %f524;
	mul.ftz.f32 	%f526, %f144, %f525;
	fma.rn.ftz.f32 	%f527, %f139, %f284, %f526;
	mul.ftz.f32 	%f528, %f286, %f527;
	fma.rn.ftz.f32 	%f463, %f143, %f289, %f528;
$Lt_128_280578:
	.loc	6	196	0
	mov.f32 	%f137, %f465;
	mov.f32 	%f138, %f464;
	mov.f32 	%f139, %f463;
	mov.f32 	%f140, %f459;
	bra.uni 	$Lt_128_315394;
$Lt_128_2050:
	.loc	22	474	0
	ld.param.f32 	%f529, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f529, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f530, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f530;
	mov.f32 	%f531, %f275;
	mov.f32 	%f532, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f533, %f275, %f532;
	mov.f32 	%f534, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p56, %f533, %f534;
	@!%p56 bra 	$Lt_128_281346;
	mov.f32 	%f535, 0f00000000;   	// 0
	mov.f32 	%f536, 0f00000000;   	// 0
	mov.f32 	%f537, 0f00000000;   	// 0
	mov.f32 	%f531, 0f00000000;   	// 0
	bra.uni 	$Lt_128_281090;
$Lt_128_281346:
	mov.f32 	%f538, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f538, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f539, 0f3f800000;   	// 1
	mul.ftz.f32 	%f540, %f272, %f285;
	sub.ftz.f32 	%f289, %f539, %f540;
	mov.f32 	%f541, 0f00000000;   	// 0
	max.ftz.f32 	%f542, %f141, %f541;
	mov.f32 	%f543, 0f3f800000;   	// 1
	min.ftz.f32 	%f544, %f542, %f543;
	mov.f32 	%f545, 0f00000000;   	// 0
	max.ftz.f32 	%f546, %f137, %f545;
	mov.f32 	%f547, 0f3f800000;   	// 1
	min.ftz.f32 	%f548, %f546, %f547;
	add.ftz.f32 	%f549, %f544, %f548;
	mov.f32 	%f550, 0fbf800000;   	// -1
	add.ftz.f32 	%f551, %f549, %f550;
	mov.f32 	%f552, 0f00000000;   	// 0
	max.ftz.f32 	%f553, %f551, %f552;
	mov.f32 	%f554, 0f3f800000;   	// 1
	min.ftz.f32 	%f555, %f553, %f554;
	mul.ftz.f32 	%f556, %f144, %f555;
	fma.rn.ftz.f32 	%f557, %f137, %f284, %f556;
	mul.ftz.f32 	%f558, %f286, %f557;
	fma.rn.ftz.f32 	%f537, %f141, %f289, %f558;
	mov.f32 	%f559, 0f00000000;   	// 0
	max.ftz.f32 	%f560, %f142, %f559;
	mov.f32 	%f561, 0f3f800000;   	// 1
	min.ftz.f32 	%f562, %f560, %f561;
	mov.f32 	%f563, 0f00000000;   	// 0
	max.ftz.f32 	%f564, %f138, %f563;
	mov.f32 	%f565, 0f3f800000;   	// 1
	min.ftz.f32 	%f566, %f564, %f565;
	add.ftz.f32 	%f567, %f562, %f566;
	mov.f32 	%f568, 0fbf800000;   	// -1
	add.ftz.f32 	%f569, %f567, %f568;
	mov.f32 	%f570, 0f00000000;   	// 0
	max.ftz.f32 	%f571, %f569, %f570;
	mov.f32 	%f572, 0f3f800000;   	// 1
	min.ftz.f32 	%f573, %f571, %f572;
	mul.ftz.f32 	%f574, %f144, %f573;
	fma.rn.ftz.f32 	%f575, %f138, %f284, %f574;
	mul.ftz.f32 	%f576, %f286, %f575;
	fma.rn.ftz.f32 	%f536, %f142, %f289, %f576;
	mov.f32 	%f577, 0f00000000;   	// 0
	max.ftz.f32 	%f578, %f143, %f577;
	mov.f32 	%f579, 0f3f800000;   	// 1
	min.ftz.f32 	%f580, %f578, %f579;
	mov.f32 	%f581, 0f00000000;   	// 0
	max.ftz.f32 	%f582, %f139, %f581;
	mov.f32 	%f583, 0f3f800000;   	// 1
	min.ftz.f32 	%f584, %f582, %f583;
	add.ftz.f32 	%f585, %f580, %f584;
	mov.f32 	%f586, 0fbf800000;   	// -1
	add.ftz.f32 	%f587, %f585, %f586;
	mov.f32 	%f588, 0f00000000;   	// 0
	max.ftz.f32 	%f589, %f587, %f588;
	mov.f32 	%f590, 0f3f800000;   	// 1
	min.ftz.f32 	%f591, %f589, %f590;
	mul.ftz.f32 	%f592, %f144, %f591;
	fma.rn.ftz.f32 	%f593, %f139, %f284, %f592;
	mul.ftz.f32 	%f594, %f286, %f593;
	fma.rn.ftz.f32 	%f535, %f143, %f289, %f594;
$Lt_128_281090:
	.loc	6	197	0
	mov.f32 	%f137, %f537;
	mov.f32 	%f138, %f536;
	mov.f32 	%f139, %f535;
	mov.f32 	%f140, %f531;
	bra.uni 	$Lt_128_315394;
$Lt_128_2306:
	.loc	6	198	0
	ld.param.f32 	%f595, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f595, %f140;
	mov.f32 	%f596, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f597, %f272, %f596;
	mov.f32 	%f598, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p57, %f597, %f598;
	@!%p57 bra 	$Lt_128_281858;
	.loc	22	608	0
	mov.f32 	%f599, %f141;
	mov.f32 	%f600, %f142;
	mov.f32 	%f601, %f143;
	mov.f32 	%f602, %f144;
	bra.uni 	$Lt_128_282626;
$Lt_128_281858:
	mov.f32 	%f603, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f604, %f144, %f603;
	mov.f32 	%f605, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p58, %f604, %f605;
	@!%p58 bra 	$Lt_128_282370;
	mov.f32 	%f599, %f137;
	mov.f32 	%f600, %f138;
	mov.f32 	%f601, %f139;
	mov.f32 	%f602, %f272;
	bra.uni 	$Lt_128_282626;
$Lt_128_282370:
	mov.u32 	%r108, 720;
	setp.gt.s32 	%p59, %r12, %r108;
	@%p59 bra 	$Lt_128_282882;
	.loc	22	555	0
	ld.const.f32 	%f606, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f607, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f608, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f609, %f142, %f608;
	fma.rn.ftz.f32 	%f610, %f607, %f143, %f609;
	fma.rn.ftz.f32 	%f611, %f606, %f141, %f610;
	cvt.ftz.sat.f32.f32 	%f612, %f611;
	mul.ftz.f32 	%f613, %f608, %f138;
	fma.rn.ftz.f32 	%f614, %f607, %f139, %f613;
	fma.rn.ftz.f32 	%f615, %f606, %f137, %f614;
	cvt.ftz.sat.f32.f32 	%f616, %f615;
	setp.gt.ftz.f32 	%p60, %f612, %f616;
	@!%p60 bra 	$Lt_128_237570;
	.loc	22	468	0
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f617, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f617;
	mov.f32 	%f618, %f275;
	mov.f32 	%f619, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f620, %f275, %f619;
	mov.f32 	%f621, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p61, %f620, %f621;
	@!%p61 bra 	$Lt_128_283394;
	mov.f32 	%f622, 0f00000000;   	// 0
	mov.f32 	%f623, 0f00000000;   	// 0
	mov.f32 	%f624, 0f00000000;   	// 0
	mov.f32 	%f618, 0f00000000;   	// 0
	bra.uni 	$Lt_128_283138;
$Lt_128_283394:
	mov.f32 	%f625, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f625, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f626, 0f3f800000;   	// 1
	mul.ftz.f32 	%f627, %f272, %f285;
	sub.ftz.f32 	%f289, %f626, %f627;
	mul.ftz.f32 	%f628, %f284, %f137;
	fma.rn.ftz.f32 	%f629, %f137, %f144, %f628;
	mul.ftz.f32 	%f630, %f286, %f629;
	fma.rn.ftz.f32 	%f624, %f141, %f289, %f630;
	mul.ftz.f32 	%f631, %f284, %f138;
	fma.rn.ftz.f32 	%f632, %f138, %f144, %f631;
	mul.ftz.f32 	%f633, %f286, %f632;
	fma.rn.ftz.f32 	%f623, %f142, %f289, %f633;
	mul.ftz.f32 	%f634, %f284, %f139;
	fma.rn.ftz.f32 	%f635, %f139, %f144, %f634;
	mul.ftz.f32 	%f636, %f286, %f635;
	fma.rn.ftz.f32 	%f622, %f143, %f289, %f636;
$Lt_128_283138:
	.loc	22	557	0
	mov.f32 	%f637, %f624;
	mov.f32 	%f638, %f623;
	mov.f32 	%f639, %f622;
	mov.f32 	%f640, %f618;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_305_54;
$Lt_128_237570:
	.loc	22	561	0
	mov.f32 	%f637, %f141;
	mov.f32 	%f638, %f142;
	mov.f32 	%f639, %f143;
	mov.f32 	%f640, %f144;
$LDWendi__Z10GetLuma6018PixelRGB_305_54:
	.loc	22	608	0
	mov.f32 	%f599, %f637;
	mov.f32 	%f600, %f638;
	mov.f32 	%f601, %f639;
	mov.f32 	%f602, %f640;
	bra.uni 	$Lt_128_282626;
$Lt_128_282882:
	.loc	22	569	0
	ld.const.f32 	%f641, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f642, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f643, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f644, %f142, %f643;
	fma.rn.ftz.f32 	%f645, %f642, %f143, %f644;
	fma.rn.ftz.f32 	%f646, %f641, %f141, %f645;
	cvt.ftz.sat.f32.f32 	%f647, %f646;
	mul.ftz.f32 	%f648, %f643, %f138;
	fma.rn.ftz.f32 	%f649, %f642, %f139, %f648;
	fma.rn.ftz.f32 	%f650, %f641, %f137, %f649;
	cvt.ftz.sat.f32.f32 	%f651, %f650;
	setp.gt.ftz.f32 	%p62, %f647, %f651;
	@!%p62 bra 	$Lt_128_238082;
	.loc	22	468	0
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f652, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f652;
	mov.f32 	%f653, %f275;
	mov.f32 	%f654, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f655, %f275, %f654;
	mov.f32 	%f656, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p63, %f655, %f656;
	@!%p63 bra 	$Lt_128_283906;
	mov.f32 	%f657, 0f00000000;   	// 0
	mov.f32 	%f658, 0f00000000;   	// 0
	mov.f32 	%f659, 0f00000000;   	// 0
	mov.f32 	%f653, 0f00000000;   	// 0
	bra.uni 	$Lt_128_283650;
$Lt_128_283906:
	mov.f32 	%f660, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f660, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f661, 0f3f800000;   	// 1
	mul.ftz.f32 	%f662, %f272, %f285;
	sub.ftz.f32 	%f289, %f661, %f662;
	mul.ftz.f32 	%f663, %f284, %f137;
	fma.rn.ftz.f32 	%f664, %f137, %f144, %f663;
	mul.ftz.f32 	%f665, %f286, %f664;
	fma.rn.ftz.f32 	%f659, %f141, %f289, %f665;
	mul.ftz.f32 	%f666, %f284, %f138;
	fma.rn.ftz.f32 	%f667, %f138, %f144, %f666;
	mul.ftz.f32 	%f668, %f286, %f667;
	fma.rn.ftz.f32 	%f658, %f142, %f289, %f668;
	mul.ftz.f32 	%f669, %f284, %f139;
	fma.rn.ftz.f32 	%f670, %f139, %f144, %f669;
	mul.ftz.f32 	%f671, %f286, %f670;
	fma.rn.ftz.f32 	%f657, %f143, %f289, %f671;
$Lt_128_283650:
	.loc	22	571	0
	mov.f32 	%f672, %f659;
	mov.f32 	%f673, %f658;
	mov.f32 	%f674, %f657;
	mov.f32 	%f675, %f653;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_305_52;
$Lt_128_238082:
	.loc	22	575	0
	mov.f32 	%f672, %f141;
	mov.f32 	%f673, %f142;
	mov.f32 	%f674, %f143;
	mov.f32 	%f675, %f144;
$LDWendi__Z10GetLuma7098PixelRGB_305_52:
	.loc	22	608	0
	mov.f32 	%f599, %f672;
	mov.f32 	%f600, %f673;
	mov.f32 	%f601, %f674;
	mov.f32 	%f602, %f675;
$Lt_128_282626:
$Lt_128_282114:
$Lt_128_281602:
	.loc	6	198	0
	mov.f32 	%f137, %f599;
	mov.f32 	%f138, %f600;
	mov.f32 	%f139, %f601;
	mov.f32 	%f140, %f602;
	bra.uni 	$Lt_128_315394;
$Lt_128_2562:
	.loc	22	475	0
	ld.param.f32 	%f676, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f676, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f677, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f677;
	mov.f32 	%f678, %f275;
	mov.f32 	%f679, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f680, %f275, %f679;
	mov.f32 	%f681, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p64, %f680, %f681;
	@!%p64 bra 	$Lt_128_284418;
	mov.f32 	%f682, 0f00000000;   	// 0
	mov.f32 	%f683, 0f00000000;   	// 0
	mov.f32 	%f684, 0f00000000;   	// 0
	mov.f32 	%f678, 0f00000000;   	// 0
	bra.uni 	$Lt_128_284162;
$Lt_128_284418:
	mov.f32 	%f685, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f685, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f686, 0f3f800000;   	// 1
	mul.ftz.f32 	%f687, %f272, %f285;
	sub.ftz.f32 	%f289, %f686, %f687;
	mov.f32 	%f688, 0f00000000;   	// 0
	max.ftz.f32 	%f689, %f141, %f688;
	mov.f32 	%f690, 0f3f800000;   	// 1
	min.ftz.f32 	%f691, %f689, %f690;
	mov.f32 	%f692, 0f3f800000;   	// 1
	mov.f32 	%f693, 0f00000000;   	// 0
	max.ftz.f32 	%f694, %f137, %f693;
	mov.f32 	%f695, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f696, %f694, %f695;
	sub.ftz.f32 	%f697, %f692, %f696;
	div.approx.ftz.f32 	%f698, %f691, %f697;
	mov.f32 	%f699, 0f00000000;   	// 0
	max.ftz.f32 	%f700, %f698, %f699;
	mov.f32 	%f701, 0f3f800000;   	// 1
	min.ftz.f32 	%f702, %f700, %f701;
	mul.ftz.f32 	%f703, %f144, %f702;
	fma.rn.ftz.f32 	%f704, %f137, %f284, %f703;
	mul.ftz.f32 	%f705, %f286, %f704;
	fma.rn.ftz.f32 	%f684, %f141, %f289, %f705;
	mov.f32 	%f706, 0f00000000;   	// 0
	max.ftz.f32 	%f707, %f142, %f706;
	mov.f32 	%f708, 0f3f800000;   	// 1
	min.ftz.f32 	%f709, %f707, %f708;
	mov.f32 	%f710, 0f3f800000;   	// 1
	mov.f32 	%f711, 0f00000000;   	// 0
	max.ftz.f32 	%f712, %f138, %f711;
	mov.f32 	%f713, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f714, %f712, %f713;
	sub.ftz.f32 	%f715, %f710, %f714;
	div.approx.ftz.f32 	%f716, %f709, %f715;
	mov.f32 	%f717, 0f00000000;   	// 0
	max.ftz.f32 	%f718, %f716, %f717;
	mov.f32 	%f719, 0f3f800000;   	// 1
	min.ftz.f32 	%f720, %f718, %f719;
	mul.ftz.f32 	%f721, %f144, %f720;
	fma.rn.ftz.f32 	%f722, %f138, %f284, %f721;
	mul.ftz.f32 	%f723, %f286, %f722;
	fma.rn.ftz.f32 	%f683, %f142, %f289, %f723;
	mov.f32 	%f724, 0f00000000;   	// 0
	max.ftz.f32 	%f725, %f143, %f724;
	mov.f32 	%f726, 0f3f800000;   	// 1
	min.ftz.f32 	%f727, %f725, %f726;
	mov.f32 	%f728, 0f3f800000;   	// 1
	mov.f32 	%f729, 0f00000000;   	// 0
	max.ftz.f32 	%f730, %f139, %f729;
	mov.f32 	%f731, 0f3f7fff58;   	// 0.99999
	min.ftz.f32 	%f732, %f730, %f731;
	sub.ftz.f32 	%f733, %f728, %f732;
	div.approx.ftz.f32 	%f734, %f727, %f733;
	mov.f32 	%f735, 0f00000000;   	// 0
	max.ftz.f32 	%f736, %f734, %f735;
	mov.f32 	%f737, 0f3f800000;   	// 1
	min.ftz.f32 	%f738, %f736, %f737;
	mul.ftz.f32 	%f739, %f144, %f738;
	fma.rn.ftz.f32 	%f740, %f139, %f284, %f739;
	mul.ftz.f32 	%f741, %f286, %f740;
	fma.rn.ftz.f32 	%f682, %f143, %f289, %f741;
$Lt_128_284162:
	.loc	6	199	0
	mov.f32 	%f137, %f684;
	mov.f32 	%f138, %f683;
	mov.f32 	%f139, %f682;
	mov.f32 	%f140, %f678;
	bra.uni 	$Lt_128_315394;
$Lt_128_2818:
	.loc	22	476	0
	ld.param.f32 	%f742, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f742, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f743, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f743;
	mov.f32 	%f744, %f275;
	mov.f32 	%f745, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f746, %f275, %f745;
	mov.f32 	%f747, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p65, %f746, %f747;
	@!%p65 bra 	$Lt_128_284930;
	mov.f32 	%f748, 0f00000000;   	// 0
	mov.f32 	%f749, 0f00000000;   	// 0
	mov.f32 	%f750, 0f00000000;   	// 0
	mov.f32 	%f744, 0f00000000;   	// 0
	bra.uni 	$Lt_128_284674;
$Lt_128_284930:
	mov.f32 	%f751, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f751, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f752, 0f3f800000;   	// 1
	mul.ftz.f32 	%f753, %f272, %f285;
	sub.ftz.f32 	%f289, %f752, %f753;
	add.ftz.f32 	%f754, %f141, %f137;
	mov.f32 	%f755, 0f00000000;   	// 0
	max.ftz.f32 	%f756, %f754, %f755;
	mov.f32 	%f757, 0f3f800000;   	// 1
	min.ftz.f32 	%f758, %f756, %f757;
	mul.ftz.f32 	%f759, %f144, %f758;
	fma.rn.ftz.f32 	%f760, %f137, %f284, %f759;
	mul.ftz.f32 	%f761, %f286, %f760;
	fma.rn.ftz.f32 	%f750, %f141, %f289, %f761;
	add.ftz.f32 	%f762, %f142, %f138;
	mov.f32 	%f763, 0f00000000;   	// 0
	max.ftz.f32 	%f764, %f762, %f763;
	mov.f32 	%f765, 0f3f800000;   	// 1
	min.ftz.f32 	%f766, %f764, %f765;
	mul.ftz.f32 	%f767, %f144, %f766;
	fma.rn.ftz.f32 	%f768, %f138, %f284, %f767;
	mul.ftz.f32 	%f769, %f286, %f768;
	fma.rn.ftz.f32 	%f749, %f142, %f289, %f769;
	add.ftz.f32 	%f770, %f143, %f139;
	mov.f32 	%f771, 0f00000000;   	// 0
	max.ftz.f32 	%f772, %f770, %f771;
	mov.f32 	%f773, 0f3f800000;   	// 1
	min.ftz.f32 	%f774, %f772, %f773;
	mul.ftz.f32 	%f775, %f144, %f774;
	fma.rn.ftz.f32 	%f776, %f139, %f284, %f775;
	mul.ftz.f32 	%f777, %f286, %f776;
	fma.rn.ftz.f32 	%f748, %f143, %f289, %f777;
$Lt_128_284674:
	.loc	6	200	0
	mov.f32 	%f137, %f750;
	mov.f32 	%f138, %f749;
	mov.f32 	%f139, %f748;
	mov.f32 	%f140, %f744;
	bra.uni 	$Lt_128_315394;
$Lt_128_3074:
	.loc	6	201	0
	ld.param.f32 	%f778, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f778, %f140;
	mov.f32 	%f779, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f780, %f272, %f779;
	mov.f32 	%f781, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p66, %f780, %f781;
	@!%p66 bra 	$Lt_128_285442;
	.loc	22	609	0
	mov.f32 	%f782, %f141;
	mov.f32 	%f783, %f142;
	mov.f32 	%f784, %f143;
	mov.f32 	%f785, %f144;
	bra.uni 	$Lt_128_286210;
$Lt_128_285442:
	mov.f32 	%f786, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f787, %f144, %f786;
	mov.f32 	%f788, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p67, %f787, %f788;
	@!%p67 bra 	$Lt_128_285954;
	mov.f32 	%f782, %f137;
	mov.f32 	%f783, %f138;
	mov.f32 	%f784, %f139;
	mov.f32 	%f785, %f272;
	bra.uni 	$Lt_128_286210;
$Lt_128_285954:
	mov.u32 	%r109, 720;
	setp.gt.s32 	%p68, %r12, %r109;
	@%p68 bra 	$Lt_128_286466;
	.loc	22	584	0
	ld.const.f32 	%f606, [kRGB32f_To_601YPbPr+8];
	ld.const.f32 	%f607, [kRGB32f_To_601YPbPr+0];
	ld.const.f32 	%f608, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f789, %f142, %f608;
	fma.rn.ftz.f32 	%f790, %f607, %f143, %f789;
	fma.rn.ftz.f32 	%f791, %f606, %f141, %f790;
	cvt.ftz.sat.f32.f32 	%f792, %f791;
	mul.ftz.f32 	%f793, %f608, %f138;
	fma.rn.ftz.f32 	%f794, %f607, %f139, %f793;
	fma.rn.ftz.f32 	%f795, %f606, %f137, %f794;
	cvt.ftz.sat.f32.f32 	%f796, %f795;
	setp.lt.ftz.f32 	%p69, %f792, %f796;
	@!%p69 bra 	$Lt_128_239874;
	.loc	22	468	0
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f797, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f797;
	mov.f32 	%f798, %f275;
	mov.f32 	%f799, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f800, %f275, %f799;
	mov.f32 	%f801, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p70, %f800, %f801;
	@!%p70 bra 	$Lt_128_286978;
	mov.f32 	%f802, 0f00000000;   	// 0
	mov.f32 	%f803, 0f00000000;   	// 0
	mov.f32 	%f804, 0f00000000;   	// 0
	mov.f32 	%f798, 0f00000000;   	// 0
	bra.uni 	$Lt_128_286722;
$Lt_128_286978:
	mov.f32 	%f805, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f805, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f806, 0f3f800000;   	// 1
	mul.ftz.f32 	%f807, %f272, %f285;
	sub.ftz.f32 	%f289, %f806, %f807;
	mul.ftz.f32 	%f808, %f284, %f137;
	fma.rn.ftz.f32 	%f809, %f137, %f144, %f808;
	mul.ftz.f32 	%f810, %f286, %f809;
	fma.rn.ftz.f32 	%f804, %f141, %f289, %f810;
	mul.ftz.f32 	%f811, %f284, %f138;
	fma.rn.ftz.f32 	%f812, %f138, %f144, %f811;
	mul.ftz.f32 	%f813, %f286, %f812;
	fma.rn.ftz.f32 	%f803, %f142, %f289, %f813;
	mul.ftz.f32 	%f814, %f284, %f139;
	fma.rn.ftz.f32 	%f815, %f139, %f144, %f814;
	mul.ftz.f32 	%f816, %f286, %f815;
	fma.rn.ftz.f32 	%f802, %f143, %f289, %f816;
$Lt_128_286722:
	.loc	22	586	0
	mov.f32 	%f817, %f804;
	mov.f32 	%f818, %f803;
	mov.f32 	%f819, %f802;
	mov.f32 	%f820, %f798;
	bra.uni 	$LDWendi__Z10GetLuma6018PixelRGB_305_50;
$Lt_128_239874:
	.loc	22	590	0
	mov.f32 	%f817, %f141;
	mov.f32 	%f818, %f142;
	mov.f32 	%f819, %f143;
	mov.f32 	%f820, %f144;
$LDWendi__Z10GetLuma6018PixelRGB_305_50:
	.loc	22	609	0
	mov.f32 	%f782, %f817;
	mov.f32 	%f783, %f818;
	mov.f32 	%f784, %f819;
	mov.f32 	%f785, %f820;
	bra.uni 	$Lt_128_286210;
$Lt_128_286466:
	.loc	22	598	0
	ld.const.f32 	%f821, [kRGB32f_To_709YPbPr+8];
	ld.const.f32 	%f822, [kRGB32f_To_709YPbPr+0];
	ld.const.f32 	%f823, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f824, %f142, %f823;
	fma.rn.ftz.f32 	%f825, %f822, %f143, %f824;
	fma.rn.ftz.f32 	%f826, %f821, %f141, %f825;
	cvt.ftz.sat.f32.f32 	%f827, %f826;
	mul.ftz.f32 	%f828, %f823, %f138;
	fma.rn.ftz.f32 	%f829, %f822, %f139, %f828;
	fma.rn.ftz.f32 	%f830, %f821, %f137, %f829;
	cvt.ftz.sat.f32.f32 	%f831, %f830;
	setp.lt.ftz.f32 	%p71, %f827, %f831;
	@!%p71 bra 	$Lt_128_240386;
	.loc	22	468	0
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f832, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f832;
	mov.f32 	%f833, %f275;
	mov.f32 	%f834, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f835, %f275, %f834;
	mov.f32 	%f836, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p72, %f835, %f836;
	@!%p72 bra 	$Lt_128_287490;
	mov.f32 	%f837, 0f00000000;   	// 0
	mov.f32 	%f838, 0f00000000;   	// 0
	mov.f32 	%f839, 0f00000000;   	// 0
	mov.f32 	%f833, 0f00000000;   	// 0
	bra.uni 	$Lt_128_287234;
$Lt_128_287490:
	mov.f32 	%f840, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f840, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f841, 0f3f800000;   	// 1
	mul.ftz.f32 	%f842, %f272, %f285;
	sub.ftz.f32 	%f289, %f841, %f842;
	mul.ftz.f32 	%f843, %f284, %f137;
	fma.rn.ftz.f32 	%f844, %f137, %f144, %f843;
	mul.ftz.f32 	%f845, %f286, %f844;
	fma.rn.ftz.f32 	%f839, %f141, %f289, %f845;
	mul.ftz.f32 	%f846, %f284, %f138;
	fma.rn.ftz.f32 	%f847, %f138, %f144, %f846;
	mul.ftz.f32 	%f848, %f286, %f847;
	fma.rn.ftz.f32 	%f838, %f142, %f289, %f848;
	mul.ftz.f32 	%f849, %f284, %f139;
	fma.rn.ftz.f32 	%f850, %f139, %f144, %f849;
	mul.ftz.f32 	%f851, %f286, %f850;
	fma.rn.ftz.f32 	%f837, %f143, %f289, %f851;
$Lt_128_287234:
	.loc	22	600	0
	mov.f32 	%f852, %f839;
	mov.f32 	%f853, %f838;
	mov.f32 	%f854, %f837;
	mov.f32 	%f855, %f833;
	bra.uni 	$LDWendi__Z10GetLuma7098PixelRGB_305_48;
$Lt_128_240386:
	.loc	22	604	0
	mov.f32 	%f852, %f141;
	mov.f32 	%f853, %f142;
	mov.f32 	%f854, %f143;
	mov.f32 	%f855, %f144;
$LDWendi__Z10GetLuma7098PixelRGB_305_48:
	.loc	22	609	0
	mov.f32 	%f782, %f852;
	mov.f32 	%f783, %f853;
	mov.f32 	%f784, %f854;
	mov.f32 	%f785, %f855;
$Lt_128_286210:
$Lt_128_285698:
$Lt_128_285186:
	.loc	6	201	0
	mov.f32 	%f137, %f782;
	mov.f32 	%f138, %f783;
	mov.f32 	%f139, %f784;
	mov.f32 	%f140, %f785;
	bra.uni 	$Lt_128_315394;
$Lt_128_3330:
	.loc	22	477	0
	ld.param.f32 	%f856, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f856, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f857, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f857;
	mov.f32 	%f858, %f275;
	mov.f32 	%f859, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f860, %f275, %f859;
	mov.f32 	%f861, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p73, %f860, %f861;
	@!%p73 bra 	$Lt_128_288002;
	mov.f32 	%f862, 0f00000000;   	// 0
	mov.f32 	%f863, 0f00000000;   	// 0
	mov.f32 	%f864, 0f00000000;   	// 0
	mov.f32 	%f858, 0f00000000;   	// 0
	bra.uni 	$Lt_128_287746;
$Lt_128_288002:
	.loc	22	373	0
	mov.f32 	%f865, 0f00000000;   	// 0
	max.ftz.f32 	%f866, %f141, %f865;
	mov.f32 	%f867, 0f00000000;   	// 0
	max.ftz.f32 	%f868, %f137, %f867;
	mov.f32 	%f869, 0f3f800000;   	// 1
	min.ftz.f32 	%f870, %f866, %f869;
	mov.f32 	%f871, 0f3f800000;   	// 1
	min.ftz.f32 	%f872, %f868, %f871;
	mov.f32 	%f873, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p74, %f870, %f873;
	@!%p74 bra 	$Lt_128_288514;
	add.ftz.f32 	%f874, %f872, %f872;
	mul.ftz.f32 	%f875, %f870, %f874;
	bra.uni 	$Lt_128_288258;
$Lt_128_288514:
	mov.f32 	%f876, 0f3f800000;   	// 1
	sub.ftz.f32 	%f877, %f876, %f872;
	mov.f32 	%f878, 0f3f800000;   	// 1
	add.ftz.f32 	%f879, %f877, %f877;
	mov.f32 	%f880, 0f3f800000;   	// 1
	sub.ftz.f32 	%f881, %f880, %f870;
	mul.ftz.f32 	%f882, %f879, %f881;
	sub.ftz.f32 	%f875, %f878, %f882;
$Lt_128_288258:
	.loc	22	477	0
	mov.f32 	%f883, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f883, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f884, 0f3f800000;   	// 1
	mul.ftz.f32 	%f885, %f272, %f285;
	sub.ftz.f32 	%f289, %f884, %f885;
	mov.f32 	%f886, 0f00000000;   	// 0
	max.ftz.f32 	%f887, %f875, %f886;
	mov.f32 	%f888, 0f3f800000;   	// 1
	min.ftz.f32 	%f889, %f887, %f888;
	mul.ftz.f32 	%f890, %f144, %f889;
	fma.rn.ftz.f32 	%f891, %f137, %f284, %f890;
	mul.ftz.f32 	%f892, %f286, %f891;
	fma.rn.ftz.f32 	%f864, %f141, %f289, %f892;
	.loc	22	373	0
	mov.f32 	%f893, 0f00000000;   	// 0
	max.ftz.f32 	%f894, %f142, %f893;
	mov.f32 	%f895, 0f00000000;   	// 0
	max.ftz.f32 	%f896, %f138, %f895;
	mov.f32 	%f897, 0f3f800000;   	// 1
	min.ftz.f32 	%f898, %f894, %f897;
	mov.f32 	%f899, 0f3f800000;   	// 1
	min.ftz.f32 	%f900, %f896, %f899;
	mov.f32 	%f901, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p75, %f898, %f901;
	@!%p75 bra 	$Lt_128_289026;
	add.ftz.f32 	%f902, %f900, %f900;
	mul.ftz.f32 	%f903, %f898, %f902;
	bra.uni 	$Lt_128_288770;
$Lt_128_289026:
	mov.f32 	%f904, 0f3f800000;   	// 1
	sub.ftz.f32 	%f905, %f904, %f900;
	mov.f32 	%f906, 0f3f800000;   	// 1
	add.ftz.f32 	%f907, %f905, %f905;
	mov.f32 	%f908, 0f3f800000;   	// 1
	sub.ftz.f32 	%f909, %f908, %f898;
	mul.ftz.f32 	%f910, %f907, %f909;
	sub.ftz.f32 	%f903, %f906, %f910;
$Lt_128_288770:
	.loc	22	477	0
	mov.f32 	%f911, 0f00000000;   	// 0
	max.ftz.f32 	%f912, %f903, %f911;
	mov.f32 	%f913, 0f3f800000;   	// 1
	min.ftz.f32 	%f914, %f912, %f913;
	mul.ftz.f32 	%f915, %f144, %f914;
	fma.rn.ftz.f32 	%f916, %f138, %f284, %f915;
	mul.ftz.f32 	%f917, %f286, %f916;
	fma.rn.ftz.f32 	%f863, %f142, %f289, %f917;
	.loc	22	373	0
	mov.f32 	%f918, 0f00000000;   	// 0
	max.ftz.f32 	%f919, %f143, %f918;
	mov.f32 	%f920, 0f00000000;   	// 0
	max.ftz.f32 	%f921, %f139, %f920;
	mov.f32 	%f922, 0f3f800000;   	// 1
	min.ftz.f32 	%f923, %f919, %f922;
	mov.f32 	%f924, 0f3f800000;   	// 1
	min.ftz.f32 	%f925, %f921, %f924;
	mov.f32 	%f926, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p76, %f923, %f926;
	@!%p76 bra 	$Lt_128_289538;
	add.ftz.f32 	%f927, %f925, %f925;
	mul.ftz.f32 	%f928, %f923, %f927;
	bra.uni 	$Lt_128_289282;
$Lt_128_289538:
	mov.f32 	%f929, 0f3f800000;   	// 1
	sub.ftz.f32 	%f930, %f929, %f925;
	mov.f32 	%f931, 0f3f800000;   	// 1
	add.ftz.f32 	%f932, %f930, %f930;
	mov.f32 	%f933, 0f3f800000;   	// 1
	sub.ftz.f32 	%f934, %f933, %f923;
	mul.ftz.f32 	%f935, %f932, %f934;
	sub.ftz.f32 	%f928, %f931, %f935;
$Lt_128_289282:
	.loc	22	477	0
	mov.f32 	%f936, 0f00000000;   	// 0
	max.ftz.f32 	%f937, %f928, %f936;
	mov.f32 	%f938, 0f3f800000;   	// 1
	min.ftz.f32 	%f939, %f937, %f938;
	mul.ftz.f32 	%f940, %f144, %f939;
	fma.rn.ftz.f32 	%f941, %f139, %f284, %f940;
	mul.ftz.f32 	%f942, %f286, %f941;
	fma.rn.ftz.f32 	%f862, %f143, %f289, %f942;
$Lt_128_287746:
	.loc	6	202	0
	mov.f32 	%f137, %f864;
	mov.f32 	%f138, %f863;
	mov.f32 	%f139, %f862;
	mov.f32 	%f140, %f858;
	bra.uni 	$Lt_128_315394;
$Lt_128_3586:
	.loc	22	478	0
	ld.param.f32 	%f943, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f943, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f944, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f944;
	mov.f32 	%f945, %f275;
	mov.f32 	%f946, 0fb70637bd;   	// -8e-006
	add.ftz.f32 	%f947, %f275, %f946;
	mov.f32 	%f948, 0f00000000;   	// 0
	setp.le.ftz.f32 	%p77, %f947, %f948;
	@!%p77 bra 	$Lt_128_290050;
	mov.f32 	%f949, 0f00000000;   	// 0
	mov.f32 	%f950, 0f00000000;   	// 0
	mov.f32 	%f951, 0f00000000;   	// 0
	mov.f32 	%f945, 0f00000000;   	// 0
	bra.uni 	$Lt_128_289794;
$Lt_128_290050:
	.loc	22	380	0
	mov.f32 	%f952, 0f00000000;   	// 0
	max.ftz.f32 	%f866, %f141, %f952;
	mov.f32 	%f953, 0f00000000;   	// 0
	max.ftz.f32 	%f868, %f137, %f953;
	mov.f32 	%f954, 0f3f800000;   	// 1
	min.ftz.f32 	%f870, %f866, %f954;
	mov.f32 	%f955, 0f3f800000;   	// 1
	min.ftz.f32 	%f872, %f868, %f955;
	add.ftz.f32 	%f956, %f872, %f872;
	mov.f32 	%f957, 0fbf800000;   	// -1
	add.ftz.f32 	%f958, %f956, %f957;
	mov.f32 	%f959, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p78, %f872, %f959;
	@!%p78 bra 	$Lt_128_290562;
	mul.ftz.f32 	%f960, %f870, %f870;
	sub.ftz.f32 	%f961, %f870, %f960;
	fma.rn.ftz.f32 	%f962, %f958, %f961, %f870;
	bra.uni 	$Lt_128_290306;
$Lt_128_290562:
	sqrt.approx.ftz.f32 	%f963, %f870;
	sub.ftz.f32 	%f964, %f963, %f870;
	fma.rn.ftz.f32 	%f962, %f958, %f964, %f870;
$Lt_128_290306:
	.loc	22	478	0
	mov.f32 	%f965, 0f3f800000;   	// 1
	sub.ftz.f32 	%f284, %f965, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f966, 0f3f800000;   	// 1
	mul.ftz.f32 	%f967, %f272, %f285;
	sub.ftz.f32 	%f289, %f966, %f967;
	mov.f32 	%f968, 0f00000000;   	// 0
	max.ftz.f32 	%f969, %f962, %f968;
	mov.f32 	%f970, 0f3f800000;   	// 1
	min.ftz.f32 	%f971, %f969, %f970;
	mul.ftz.f32 	%f972, %f144, %f971;
	fma.rn.ftz.f32 	%f973, %f137, %f284, %f972;
	mul.ftz.f32 	%f974, %f286, %f973;
	fma.rn.ftz.f32 	%f951, %f141, %f289, %f974;
	.loc	22	380	0
	mov.f32 	%f975, 0f00000000;   	// 0
	max.ftz.f32 	%f894, %f142, %f975;
	mov.f32 	%f976, 0f00000000;   	// 0
	max.ftz.f32 	%f896, %f138, %f976;
	mov.f32 	%f977, 0f3f800000;   	// 1
	min.ftz.f32 	%f898, %f894, %f977;
	mov.f32 	%f978, 0f3f800000;   	// 1
	min.ftz.f32 	%f900, %f896, %f978;
	add.ftz.f32 	%f979, %f900, %f900;
	mov.f32 	%f980, 0fbf800000;   	// -1
	add.ftz.f32 	%f981, %f979, %f980;
	mov.f32 	%f982, 0f3f000000;   	// 0.5
	setp.le.ftz.f32 	%p79, %f900, %f982;
	@!%p79 bra 	$Lt_128_291074;
	mul.ftz.f32 	%f983, %f898, %f898;
	sub.ftz.f32 	%f984, %f898, %f983;
	fma.rn.ftz.f32 	%f985, %f981, %f984, %f898;
	bra.uni 	$Lt_128_290818;
$Lt_128_291074:
	sqrt.approx.ftz.f32 	%f986, %f898;
	sub.ftz.f32 	%f987, %f986, %f898;
	fma.rn.ftz.f32 	%f985, %f981, %f987, %f898;
$Lt_128_290818:
	.loc	22	478	0
	mov.f32 	%f988, 0f00000000;   	// 0
	max.ftz.f32 	%f989, %f985, %f988;
	mov.f32 	%f990, 0f3f800000;   	// 1
	min.ftz.f32 	%f991, %f989, %f990;
	mul.ftz.f32 	%f992, %f144, %f991;
	fma.rn.ftz.f32 	%f993, %f138, %f284, %f992;
	mul.ftz.f32 	%f994, %f286, %f993;
	fma.rn.ftz.f32 	%f950, %f142, %f289, %f994;
	.loc	22	380	0
	mov.f32 	%f995, 0f00000000;   	// 0
	max.ftz.f32 	%f919, %f143, %f995;
	mov.f32 	%f996, 0f00000000;   	// 0
	max.ftz.f32 	%f921, %f139, %f996;
	mov.f32 	%f997, 0f3f800000;   	// 1
	min.ftz.f32 	%f923, %f919, %f997;
	mov.f32 	%f998, 0f3f800000;   	// 1
	min.ftz.f32 	%f925, %f921, %f998;
	add.ftz.f32 	%f999, %f925, %f925;
	mov.f32 	%f1000, 0fbf800000;  	// -1
	add.ftz.f32 	%f1001, %f999, %f1000;
	mov.f32 	%f1002, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p80, %f925, %f1002;
	@!%p80 bra 	$Lt_128_291586;
	mul.ftz.f32 	%f1003, %f923, %f923;
	sub.ftz.f32 	%f1004, %f923, %f1003;
	fma.rn.ftz.f32 	%f1005, %f1001, %f1004, %f923;
	bra.uni 	$Lt_128_291330;
$Lt_128_291586:
	sqrt.approx.ftz.f32 	%f1006, %f923;
	sub.ftz.f32 	%f1007, %f1006, %f923;
	fma.rn.ftz.f32 	%f1005, %f1001, %f1007, %f923;
$Lt_128_291330:
	.loc	22	478	0
	mov.f32 	%f1008, 0f00000000;  	// 0
	max.ftz.f32 	%f1009, %f1005, %f1008;
	mov.f32 	%f1010, 0f3f800000;  	// 1
	min.ftz.f32 	%f1011, %f1009, %f1010;
	mul.ftz.f32 	%f1012, %f144, %f1011;
	fma.rn.ftz.f32 	%f1013, %f139, %f284, %f1012;
	mul.ftz.f32 	%f1014, %f286, %f1013;
	fma.rn.ftz.f32 	%f949, %f143, %f289, %f1014;
$Lt_128_289794:
	.loc	6	203	0
	mov.f32 	%f137, %f951;
	mov.f32 	%f138, %f950;
	mov.f32 	%f139, %f949;
	mov.f32 	%f140, %f945;
	bra.uni 	$Lt_128_315394;
$Lt_128_3842:
	.loc	22	479	0
	ld.param.f32 	%f1015, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1015, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1016, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1016;
	mov.f32 	%f1017, %f275;
	mov.f32 	%f1018, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1019, %f275, %f1018;
	mov.f32 	%f1020, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p81, %f1019, %f1020;
	@!%p81 bra 	$Lt_128_292098;
	mov.f32 	%f1021, 0f00000000;  	// 0
	mov.f32 	%f1022, 0f00000000;  	// 0
	mov.f32 	%f1023, 0f00000000;  	// 0
	mov.f32 	%f1017, 0f00000000;  	// 0
	bra.uni 	$Lt_128_291842;
$Lt_128_292098:
	.loc	22	386	0
	mov.f32 	%f1024, 0f00000000;  	// 0
	max.ftz.f32 	%f866, %f141, %f1024;
	mov.f32 	%f1025, 0f00000000;  	// 0
	max.ftz.f32 	%f868, %f137, %f1025;
	mov.f32 	%f1026, 0f3f800000;  	// 1
	min.ftz.f32 	%f870, %f866, %f1026;
	mov.f32 	%f1027, 0f3f800000;  	// 1
	min.ftz.f32 	%f872, %f868, %f1027;
	mov.f32 	%f1028, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p82, %f872, %f1028;
	@!%p82 bra 	$Lt_128_292610;
	add.ftz.f32 	%f1029, %f872, %f872;
	mul.ftz.f32 	%f1030, %f870, %f1029;
	bra.uni 	$Lt_128_292354;
$Lt_128_292610:
	mov.f32 	%f1031, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1032, %f1031, %f872;
	mov.f32 	%f1033, 0f3f800000;  	// 1
	add.ftz.f32 	%f1034, %f1032, %f1032;
	mov.f32 	%f1035, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1036, %f1035, %f870;
	mul.ftz.f32 	%f1037, %f1034, %f1036;
	sub.ftz.f32 	%f1030, %f1033, %f1037;
$Lt_128_292354:
	.loc	22	479	0
	mov.f32 	%f1038, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1038, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1039, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1040, %f272, %f285;
	sub.ftz.f32 	%f289, %f1039, %f1040;
	mov.f32 	%f1041, 0f00000000;  	// 0
	max.ftz.f32 	%f1042, %f1030, %f1041;
	mov.f32 	%f1043, 0f3f800000;  	// 1
	min.ftz.f32 	%f1044, %f1042, %f1043;
	mul.ftz.f32 	%f1045, %f144, %f1044;
	fma.rn.ftz.f32 	%f1046, %f137, %f284, %f1045;
	mul.ftz.f32 	%f1047, %f286, %f1046;
	fma.rn.ftz.f32 	%f1023, %f141, %f289, %f1047;
	.loc	22	386	0
	mov.f32 	%f1048, 0f00000000;  	// 0
	max.ftz.f32 	%f894, %f142, %f1048;
	mov.f32 	%f1049, 0f00000000;  	// 0
	max.ftz.f32 	%f896, %f138, %f1049;
	mov.f32 	%f1050, 0f3f800000;  	// 1
	min.ftz.f32 	%f898, %f894, %f1050;
	mov.f32 	%f1051, 0f3f800000;  	// 1
	min.ftz.f32 	%f900, %f896, %f1051;
	mov.f32 	%f1052, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p83, %f900, %f1052;
	@!%p83 bra 	$Lt_128_293122;
	add.ftz.f32 	%f1053, %f900, %f900;
	mul.ftz.f32 	%f1054, %f898, %f1053;
	bra.uni 	$Lt_128_292866;
$Lt_128_293122:
	mov.f32 	%f1055, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1056, %f1055, %f900;
	mov.f32 	%f1057, 0f3f800000;  	// 1
	add.ftz.f32 	%f1058, %f1056, %f1056;
	mov.f32 	%f1059, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1060, %f1059, %f898;
	mul.ftz.f32 	%f1061, %f1058, %f1060;
	sub.ftz.f32 	%f1054, %f1057, %f1061;
$Lt_128_292866:
	.loc	22	479	0
	mov.f32 	%f1062, 0f00000000;  	// 0
	max.ftz.f32 	%f1063, %f1054, %f1062;
	mov.f32 	%f1064, 0f3f800000;  	// 1
	min.ftz.f32 	%f1065, %f1063, %f1064;
	mul.ftz.f32 	%f1066, %f144, %f1065;
	fma.rn.ftz.f32 	%f1067, %f138, %f284, %f1066;
	mul.ftz.f32 	%f1068, %f286, %f1067;
	fma.rn.ftz.f32 	%f1022, %f142, %f289, %f1068;
	.loc	22	386	0
	mov.f32 	%f1069, 0f00000000;  	// 0
	max.ftz.f32 	%f919, %f143, %f1069;
	mov.f32 	%f1070, 0f00000000;  	// 0
	max.ftz.f32 	%f921, %f139, %f1070;
	mov.f32 	%f1071, 0f3f800000;  	// 1
	min.ftz.f32 	%f923, %f919, %f1071;
	mov.f32 	%f1072, 0f3f800000;  	// 1
	min.ftz.f32 	%f925, %f921, %f1072;
	mov.f32 	%f1073, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p84, %f925, %f1073;
	@!%p84 bra 	$Lt_128_293634;
	add.ftz.f32 	%f1074, %f925, %f925;
	mul.ftz.f32 	%f1075, %f923, %f1074;
	bra.uni 	$Lt_128_293378;
$Lt_128_293634:
	mov.f32 	%f1076, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1077, %f1076, %f925;
	mov.f32 	%f1078, 0f3f800000;  	// 1
	add.ftz.f32 	%f1079, %f1077, %f1077;
	mov.f32 	%f1080, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1081, %f1080, %f923;
	mul.ftz.f32 	%f1082, %f1079, %f1081;
	sub.ftz.f32 	%f1075, %f1078, %f1082;
$Lt_128_293378:
	.loc	22	479	0
	mov.f32 	%f1083, 0f00000000;  	// 0
	max.ftz.f32 	%f1084, %f1075, %f1083;
	mov.f32 	%f1085, 0f3f800000;  	// 1
	min.ftz.f32 	%f1086, %f1084, %f1085;
	mul.ftz.f32 	%f1087, %f144, %f1086;
	fma.rn.ftz.f32 	%f1088, %f139, %f284, %f1087;
	mul.ftz.f32 	%f1089, %f286, %f1088;
	fma.rn.ftz.f32 	%f1021, %f143, %f289, %f1089;
$Lt_128_291842:
	.loc	6	204	0
	mov.f32 	%f137, %f1023;
	mov.f32 	%f138, %f1022;
	mov.f32 	%f139, %f1021;
	mov.f32 	%f140, %f1017;
	bra.uni 	$Lt_128_315394;
$Lt_128_4098:
	.loc	22	480	0
	ld.param.f32 	%f1090, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1090, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1091, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1091;
	mov.f32 	%f1092, %f275;
	mov.f32 	%f1093, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1094, %f275, %f1093;
	mov.f32 	%f1095, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p85, %f1094, %f1095;
	@!%p85 bra 	$Lt_128_294146;
	mov.f32 	%f1096, 0f00000000;  	// 0
	mov.f32 	%f1097, 0f00000000;  	// 0
	mov.f32 	%f1098, 0f00000000;  	// 0
	mov.f32 	%f1092, 0f00000000;  	// 0
	bra.uni 	$Lt_128_293890;
$Lt_128_294146:
	.loc	22	431	0
	mov.f32 	%f1099, 0f00000000;  	// 0
	max.ftz.f32 	%f866, %f141, %f1099;
	mov.f32 	%f1100, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1101, %f137, %f1100;
	mov.f32 	%f1102, 0f3f800000;  	// 1
	min.ftz.f32 	%f870, %f866, %f1102;
	mov.f32 	%f1103, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1104, %f1101, %f1103;
	mov.f32 	%f1105, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p86, %f1104, %f1105;
	@!%p86 bra 	$Lt_128_244226;
	.loc	22	433	0
	mov.f32 	%f1106, 0f3f800000;  	// 1
	mov.f32 	%f1107, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1108, %f1107, %f870;
	add.ftz.f32 	%f1109, %f1104, %f1104;
	div.approx.ftz.f32 	%f1110, %f1108, %f1109;
	sub.ftz.f32 	%f1111, %f1106, %f1110;
	mov.f32 	%f1112, 0f00000000;  	// 0
	max.ftz.f32 	%f1113, %f1111, %f1112;
	mov.f32 	%f1114, 0f3f800000;  	// 1
	min.ftz.f32 	%f1115, %f1113, %f1114;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__305_46;
$Lt_128_244226:
	.loc	22	437	0
	mov.f32 	%f1116, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1117, %f1116, %f1104;
	add.ftz.f32 	%f1118, %f1117, %f1117;
	div.approx.ftz.f32 	%f1119, %f870, %f1118;
	mov.f32 	%f1120, 0f00000000;  	// 0
	max.ftz.f32 	%f1121, %f1119, %f1120;
	mov.f32 	%f1122, 0f3f800000;  	// 1
	min.ftz.f32 	%f1115, %f1121, %f1122;
$LDWendi__Z5ClampIfET_S0_S0_S0__305_46:
	.loc	22	480	0
	mov.f32 	%f1123, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1123, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1124, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1125, %f272, %f285;
	sub.ftz.f32 	%f289, %f1124, %f1125;
	mul.ftz.f32 	%f1126, %f1115, %f144;
	fma.rn.ftz.f32 	%f1127, %f137, %f284, %f1126;
	mul.ftz.f32 	%f1128, %f286, %f1127;
	fma.rn.ftz.f32 	%f1098, %f141, %f289, %f1128;
	.loc	22	431	0
	mov.f32 	%f1129, 0f00000000;  	// 0
	max.ftz.f32 	%f894, %f142, %f1129;
	mov.f32 	%f1130, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1131, %f138, %f1130;
	mov.f32 	%f1132, 0f3f800000;  	// 1
	min.ftz.f32 	%f898, %f894, %f1132;
	mov.f32 	%f1133, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1134, %f1131, %f1133;
	mov.f32 	%f1135, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p87, %f1134, %f1135;
	@!%p87 bra 	$Lt_128_244482;
	.loc	22	433	0
	mov.f32 	%f1136, 0f3f800000;  	// 1
	mov.f32 	%f1137, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1138, %f1137, %f898;
	add.ftz.f32 	%f1139, %f1134, %f1134;
	div.approx.ftz.f32 	%f1140, %f1138, %f1139;
	sub.ftz.f32 	%f1141, %f1136, %f1140;
	mov.f32 	%f1142, 0f00000000;  	// 0
	max.ftz.f32 	%f1143, %f1141, %f1142;
	mov.f32 	%f1144, 0f3f800000;  	// 1
	min.ftz.f32 	%f1145, %f1143, %f1144;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__305_44;
$Lt_128_244482:
	.loc	22	437	0
	mov.f32 	%f1146, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1147, %f1146, %f1134;
	add.ftz.f32 	%f1148, %f1147, %f1147;
	div.approx.ftz.f32 	%f1149, %f898, %f1148;
	mov.f32 	%f1150, 0f00000000;  	// 0
	max.ftz.f32 	%f1151, %f1149, %f1150;
	mov.f32 	%f1152, 0f3f800000;  	// 1
	min.ftz.f32 	%f1145, %f1151, %f1152;
$LDWendi__Z5ClampIfET_S0_S0_S0__305_44:
	.loc	22	480	0
	mul.ftz.f32 	%f1153, %f1145, %f144;
	fma.rn.ftz.f32 	%f1154, %f138, %f284, %f1153;
	mul.ftz.f32 	%f1155, %f286, %f1154;
	fma.rn.ftz.f32 	%f1097, %f142, %f289, %f1155;
	.loc	22	431	0
	mov.f32 	%f1156, 0f00000000;  	// 0
	max.ftz.f32 	%f919, %f143, %f1156;
	mov.f32 	%f1157, 0f358637bd;  	// 1e-006
	max.ftz.f32 	%f1158, %f139, %f1157;
	mov.f32 	%f1159, 0f3f800000;  	// 1
	min.ftz.f32 	%f923, %f919, %f1159;
	mov.f32 	%f1160, 0f3f7fffef;  	// 0.999999
	min.ftz.f32 	%f1161, %f1158, %f1160;
	mov.f32 	%f1162, 0f3f000000;  	// 0.5
	setp.le.ftz.f32 	%p88, %f1161, %f1162;
	@!%p88 bra 	$Lt_128_244738;
	.loc	22	433	0
	mov.f32 	%f1163, 0f3f800000;  	// 1
	mov.f32 	%f1164, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1165, %f1164, %f923;
	add.ftz.f32 	%f1166, %f1161, %f1161;
	div.approx.ftz.f32 	%f1167, %f1165, %f1166;
	sub.ftz.f32 	%f1168, %f1163, %f1167;
	mov.f32 	%f1169, 0f00000000;  	// 0
	max.ftz.f32 	%f1170, %f1168, %f1169;
	mov.f32 	%f1171, 0f3f800000;  	// 1
	min.ftz.f32 	%f1172, %f1170, %f1171;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__305_42;
$Lt_128_244738:
	.loc	22	437	0
	mov.f32 	%f1173, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1174, %f1173, %f1161;
	add.ftz.f32 	%f1175, %f1174, %f1174;
	div.approx.ftz.f32 	%f1176, %f923, %f1175;
	mov.f32 	%f1177, 0f00000000;  	// 0
	max.ftz.f32 	%f1178, %f1176, %f1177;
	mov.f32 	%f1179, 0f3f800000;  	// 1
	min.ftz.f32 	%f1172, %f1178, %f1179;
$LDWendi__Z5ClampIfET_S0_S0_S0__305_42:
	.loc	22	480	0
	mul.ftz.f32 	%f1180, %f1172, %f144;
	fma.rn.ftz.f32 	%f1181, %f139, %f284, %f1180;
	mul.ftz.f32 	%f1182, %f286, %f1181;
	fma.rn.ftz.f32 	%f1096, %f143, %f289, %f1182;
$Lt_128_293890:
	.loc	6	205	0
	mov.f32 	%f137, %f1098;
	mov.f32 	%f138, %f1097;
	mov.f32 	%f139, %f1096;
	mov.f32 	%f140, %f1092;
	bra.uni 	$Lt_128_315394;
$Lt_128_4354:
	.loc	22	481	0
	ld.param.f32 	%f1183, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1183, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1184, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1184;
	mov.f32 	%f1185, %f275;
	mov.f32 	%f1186, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1187, %f275, %f1186;
	mov.f32 	%f1188, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p89, %f1187, %f1188;
	@!%p89 bra 	$Lt_128_294658;
	mov.f32 	%f1189, 0f00000000;  	// 0
	mov.f32 	%f1190, 0f00000000;  	// 0
	mov.f32 	%f1191, 0f00000000;  	// 0
	mov.f32 	%f1185, 0f00000000;  	// 0
	bra.uni 	$Lt_128_294402;
$Lt_128_294658:
	mov.f32 	%f1192, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1192, %f144;
	mov.f32 	%f1193, 0f00000000;  	// 0
	max.ftz.f32 	%f868, %f137, %f1193;
	mov.f32 	%f1194, 0f3f800000;  	// 1
	min.ftz.f32 	%f872, %f868, %f1194;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1195, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1196, %f272, %f285;
	sub.ftz.f32 	%f289, %f1195, %f1196;
	add.ftz.f32 	%f1197, %f872, %f872;
	mov.f32 	%f1198, 0f00000000;  	// 0
	max.ftz.f32 	%f1199, %f141, %f1198;
	mov.f32 	%f1200, 0f3f800000;  	// 1
	min.ftz.f32 	%f1201, %f1199, %f1200;
	add.ftz.f32 	%f1202, %f1197, %f1201;
	mov.f32 	%f1203, 0fbf800000;  	// -1
	add.ftz.f32 	%f1204, %f1202, %f1203;
	mul.ftz.f32 	%f1205, %f144, %f1204;
	fma.rn.ftz.f32 	%f1206, %f137, %f284, %f1205;
	mul.ftz.f32 	%f1207, %f286, %f1206;
	fma.rn.ftz.f32 	%f1191, %f141, %f289, %f1207;
	mov.f32 	%f1208, 0f00000000;  	// 0
	max.ftz.f32 	%f896, %f138, %f1208;
	mov.f32 	%f1209, 0f3f800000;  	// 1
	min.ftz.f32 	%f900, %f896, %f1209;
	add.ftz.f32 	%f1210, %f900, %f900;
	mov.f32 	%f1211, 0f00000000;  	// 0
	max.ftz.f32 	%f1212, %f142, %f1211;
	mov.f32 	%f1213, 0f3f800000;  	// 1
	min.ftz.f32 	%f1214, %f1212, %f1213;
	add.ftz.f32 	%f1215, %f1210, %f1214;
	mov.f32 	%f1216, 0fbf800000;  	// -1
	add.ftz.f32 	%f1217, %f1215, %f1216;
	mul.ftz.f32 	%f1218, %f144, %f1217;
	fma.rn.ftz.f32 	%f1219, %f138, %f284, %f1218;
	mul.ftz.f32 	%f1220, %f286, %f1219;
	fma.rn.ftz.f32 	%f1190, %f142, %f289, %f1220;
	mov.f32 	%f1221, 0f00000000;  	// 0
	max.ftz.f32 	%f921, %f139, %f1221;
	mov.f32 	%f1222, 0f3f800000;  	// 1
	min.ftz.f32 	%f925, %f921, %f1222;
	add.ftz.f32 	%f1223, %f925, %f925;
	mov.f32 	%f1224, 0f00000000;  	// 0
	max.ftz.f32 	%f1225, %f143, %f1224;
	mov.f32 	%f1226, 0f3f800000;  	// 1
	min.ftz.f32 	%f1227, %f1225, %f1226;
	add.ftz.f32 	%f1228, %f1223, %f1227;
	mov.f32 	%f1229, 0fbf800000;  	// -1
	add.ftz.f32 	%f1230, %f1228, %f1229;
	mul.ftz.f32 	%f1231, %f144, %f1230;
	fma.rn.ftz.f32 	%f1232, %f139, %f284, %f1231;
	mul.ftz.f32 	%f1233, %f286, %f1232;
	fma.rn.ftz.f32 	%f1189, %f143, %f289, %f1233;
$Lt_128_294402:
	.loc	6	206	0
	mov.f32 	%f137, %f1191;
	mov.f32 	%f138, %f1190;
	mov.f32 	%f139, %f1189;
	mov.f32 	%f140, %f1185;
	bra.uni 	$Lt_128_315394;
$Lt_128_4610:
	.loc	22	482	0
	ld.param.f32 	%f1234, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1234, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1235, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1235;
	mov.f32 	%f1236, %f275;
	mov.f32 	%f1237, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1238, %f275, %f1237;
	mov.f32 	%f1239, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p90, %f1238, %f1239;
	@!%p90 bra 	$Lt_128_295170;
	mov.f32 	%f1240, 0f00000000;  	// 0
	mov.f32 	%f1241, 0f00000000;  	// 0
	mov.f32 	%f1242, 0f00000000;  	// 0
	mov.f32 	%f1236, 0f00000000;  	// 0
	bra.uni 	$Lt_128_294914;
$Lt_128_295170:
	.loc	22	450	0
	mov.f32 	%f1243, 0f00000000;  	// 0
	max.ftz.f32 	%f866, %f141, %f1243;
	mov.f32 	%f1244, 0f00000000;  	// 0
	max.ftz.f32 	%f868, %f137, %f1244;
	mov.f32 	%f1245, 0f3f800000;  	// 1
	min.ftz.f32 	%f870, %f866, %f1245;
	mov.f32 	%f1246, 0f3f800000;  	// 1
	min.ftz.f32 	%f872, %f868, %f1246;
	add.ftz.f32 	%f1247, %f872, %f872;
	mov.f32 	%f1248, 0fbf800000;  	// -1
	add.ftz.f32 	%f1249, %f1247, %f1248;
	setp.gt.ftz.f32 	%p91, %f1249, %f870;
	@!%p91 bra 	$Lt_128_245506;
	.loc	22	452	0
	mov.f32 	%f1250, %f1249;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__305_40;
$Lt_128_245506:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p92, %f1247, %f870;
	@!%p92 bra 	$Lt_128_245762;
	.loc	22	456	0
	mov.f32 	%f1250, %f1247;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__305_40;
$Lt_128_245762:
	.loc	22	460	0
	mov.f32 	%f1250, %f870;
$LDWendi__Z5ClampIfET_S0_S0_S0__305_40:
	.loc	22	482	0
	mov.f32 	%f1251, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1251, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1252, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1253, %f272, %f285;
	sub.ftz.f32 	%f289, %f1252, %f1253;
	mul.ftz.f32 	%f1254, %f1250, %f144;
	fma.rn.ftz.f32 	%f1255, %f137, %f284, %f1254;
	mul.ftz.f32 	%f1256, %f286, %f1255;
	fma.rn.ftz.f32 	%f1242, %f141, %f289, %f1256;
	.loc	22	450	0
	mov.f32 	%f1257, 0f00000000;  	// 0
	max.ftz.f32 	%f894, %f142, %f1257;
	mov.f32 	%f1258, 0f00000000;  	// 0
	max.ftz.f32 	%f896, %f138, %f1258;
	mov.f32 	%f1259, 0f3f800000;  	// 1
	min.ftz.f32 	%f898, %f894, %f1259;
	mov.f32 	%f1260, 0f3f800000;  	// 1
	min.ftz.f32 	%f900, %f896, %f1260;
	add.ftz.f32 	%f1261, %f900, %f900;
	mov.f32 	%f1262, 0fbf800000;  	// -1
	add.ftz.f32 	%f1263, %f1261, %f1262;
	setp.gt.ftz.f32 	%p93, %f1263, %f898;
	@!%p93 bra 	$Lt_128_246018;
	.loc	22	452	0
	mov.f32 	%f1264, %f1263;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__305_38;
$Lt_128_246018:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p94, %f1261, %f898;
	@!%p94 bra 	$Lt_128_246274;
	.loc	22	456	0
	mov.f32 	%f1264, %f1261;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__305_38;
$Lt_128_246274:
	.loc	22	460	0
	mov.f32 	%f1264, %f898;
$LDWendi__Z5ClampIfET_S0_S0_S0__305_38:
	.loc	22	482	0
	mul.ftz.f32 	%f1265, %f1264, %f144;
	fma.rn.ftz.f32 	%f1266, %f138, %f284, %f1265;
	mul.ftz.f32 	%f1267, %f286, %f1266;
	fma.rn.ftz.f32 	%f1241, %f142, %f289, %f1267;
	.loc	22	450	0
	mov.f32 	%f1268, 0f00000000;  	// 0
	max.ftz.f32 	%f919, %f143, %f1268;
	mov.f32 	%f1269, 0f00000000;  	// 0
	max.ftz.f32 	%f921, %f139, %f1269;
	mov.f32 	%f1270, 0f3f800000;  	// 1
	min.ftz.f32 	%f923, %f919, %f1270;
	mov.f32 	%f1271, 0f3f800000;  	// 1
	min.ftz.f32 	%f925, %f921, %f1271;
	add.ftz.f32 	%f1272, %f925, %f925;
	mov.f32 	%f1273, 0fbf800000;  	// -1
	add.ftz.f32 	%f1274, %f1272, %f1273;
	setp.gt.ftz.f32 	%p95, %f1274, %f923;
	@!%p95 bra 	$Lt_128_246530;
	.loc	22	452	0
	mov.f32 	%f1275, %f1274;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__305_36;
$Lt_128_246530:
	.loc	22	454	0
	setp.lt.ftz.f32 	%p96, %f1272, %f923;
	@!%p96 bra 	$Lt_128_246786;
	.loc	22	456	0
	mov.f32 	%f1275, %f1272;
	bra.uni 	$LDWendi__Z5ClampIfET_S0_S0_S0__305_36;
$Lt_128_246786:
	.loc	22	460	0
	mov.f32 	%f1275, %f923;
$LDWendi__Z5ClampIfET_S0_S0_S0__305_36:
	.loc	22	482	0
	mul.ftz.f32 	%f1276, %f1275, %f144;
	fma.rn.ftz.f32 	%f1277, %f139, %f284, %f1276;
	mul.ftz.f32 	%f1278, %f286, %f1277;
	fma.rn.ftz.f32 	%f1240, %f143, %f289, %f1278;
$Lt_128_294914:
	.loc	6	207	0
	mov.f32 	%f137, %f1242;
	mov.f32 	%f138, %f1241;
	mov.f32 	%f139, %f1240;
	mov.f32 	%f140, %f1236;
	bra.uni 	$Lt_128_315394;
$Lt_128_4866:
	.loc	22	483	0
	ld.param.f32 	%f1279, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1279, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1280, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1280;
	mov.f32 	%f1281, %f275;
	mov.f32 	%f1282, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1283, %f275, %f1282;
	mov.f32 	%f1284, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p97, %f1283, %f1284;
	@!%p97 bra 	$Lt_128_295682;
	mov.f32 	%f1285, 0f00000000;  	// 0
	mov.f32 	%f1286, 0f00000000;  	// 0
	mov.f32 	%f1287, 0f00000000;  	// 0
	mov.f32 	%f1281, 0f00000000;  	// 0
	bra.uni 	$Lt_128_295426;
$Lt_128_295682:
	mov.f32 	%f1288, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1288, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1289, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1290, %f272, %f285;
	sub.ftz.f32 	%f289, %f1289, %f1290;
	mov.f32 	%f1291, 0f00000000;  	// 0
	mov.f32 	%f1292, 0f3f800000;  	// 1
	mov.f32 	%f1293, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1294, %f1293, %f141;
	setp.lt.ftz.f32 	%p98, %f137, %f1294;
	selp.f32 	%f1295, %f1291, %f1292, %p98;
	mul.ftz.f32 	%f1296, %f1295, %f144;
	fma.rn.ftz.f32 	%f1297, %f137, %f284, %f1296;
	mul.ftz.f32 	%f1298, %f286, %f1297;
	fma.rn.ftz.f32 	%f1287, %f141, %f289, %f1298;
	mov.f32 	%f1299, 0f00000000;  	// 0
	mov.f32 	%f1300, 0f3f800000;  	// 1
	mov.f32 	%f1301, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1302, %f1301, %f142;
	setp.lt.ftz.f32 	%p99, %f138, %f1302;
	selp.f32 	%f1303, %f1299, %f1300, %p99;
	mul.ftz.f32 	%f1304, %f1303, %f144;
	fma.rn.ftz.f32 	%f1305, %f138, %f284, %f1304;
	mul.ftz.f32 	%f1306, %f286, %f1305;
	fma.rn.ftz.f32 	%f1286, %f142, %f289, %f1306;
	mov.f32 	%f1307, 0f00000000;  	// 0
	mov.f32 	%f1308, 0f3f800000;  	// 1
	mov.f32 	%f1309, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1310, %f1309, %f143;
	setp.lt.ftz.f32 	%p100, %f139, %f1310;
	selp.f32 	%f1311, %f1307, %f1308, %p100;
	mul.ftz.f32 	%f1312, %f1311, %f144;
	fma.rn.ftz.f32 	%f1313, %f139, %f284, %f1312;
	mul.ftz.f32 	%f1314, %f286, %f1313;
	fma.rn.ftz.f32 	%f1285, %f143, %f289, %f1314;
$Lt_128_295426:
	.loc	6	208	0
	mov.f32 	%f137, %f1287;
	mov.f32 	%f138, %f1286;
	mov.f32 	%f139, %f1285;
	mov.f32 	%f140, %f1281;
	bra.uni 	$Lt_128_315394;
$Lt_128_5122:
	.loc	22	484	0
	ld.param.f32 	%f1315, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1315, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1316, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1316;
	mov.f32 	%f1317, %f275;
	mov.f32 	%f1318, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1319, %f275, %f1318;
	mov.f32 	%f1320, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p101, %f1319, %f1320;
	@!%p101 bra 	$Lt_128_296194;
	mov.f32 	%f1321, 0f00000000;  	// 0
	mov.f32 	%f1322, 0f00000000;  	// 0
	mov.f32 	%f1323, 0f00000000;  	// 0
	mov.f32 	%f1317, 0f00000000;  	// 0
	bra.uni 	$Lt_128_295938;
$Lt_128_296194:
	mov.f32 	%f1324, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1324, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1325, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1326, %f272, %f285;
	sub.ftz.f32 	%f289, %f1325, %f1326;
	sub.ftz.f32 	%f1327, %f137, %f141;
	abs.ftz.f32 	%f1328, %f1327;
	mul.ftz.f32 	%f1329, %f144, %f1328;
	fma.rn.ftz.f32 	%f1330, %f137, %f284, %f1329;
	mul.ftz.f32 	%f1331, %f286, %f1330;
	fma.rn.ftz.f32 	%f1323, %f141, %f289, %f1331;
	sub.ftz.f32 	%f1332, %f138, %f142;
	abs.ftz.f32 	%f1333, %f1332;
	mul.ftz.f32 	%f1334, %f144, %f1333;
	fma.rn.ftz.f32 	%f1335, %f138, %f284, %f1334;
	mul.ftz.f32 	%f1336, %f286, %f1335;
	fma.rn.ftz.f32 	%f1322, %f142, %f289, %f1336;
	sub.ftz.f32 	%f1337, %f139, %f143;
	abs.ftz.f32 	%f1338, %f1337;
	mul.ftz.f32 	%f1339, %f144, %f1338;
	fma.rn.ftz.f32 	%f1340, %f139, %f284, %f1339;
	mul.ftz.f32 	%f1341, %f286, %f1340;
	fma.rn.ftz.f32 	%f1321, %f143, %f289, %f1341;
$Lt_128_295938:
	.loc	6	209	0
	mov.f32 	%f137, %f1323;
	mov.f32 	%f138, %f1322;
	mov.f32 	%f139, %f1321;
	mov.f32 	%f140, %f1317;
	bra.uni 	$Lt_128_315394;
$Lt_128_5378:
	.loc	22	485	0
	ld.param.f32 	%f1342, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1342, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1343, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1343;
	mov.f32 	%f1344, %f275;
	mov.f32 	%f1345, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1346, %f275, %f1345;
	mov.f32 	%f1347, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p102, %f1346, %f1347;
	@!%p102 bra 	$Lt_128_296706;
	mov.f32 	%f1348, 0f00000000;  	// 0
	mov.f32 	%f1349, 0f00000000;  	// 0
	mov.f32 	%f1350, 0f00000000;  	// 0
	mov.f32 	%f1344, 0f00000000;  	// 0
	bra.uni 	$Lt_128_296450;
$Lt_128_296706:
	mov.f32 	%f1351, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1351, %f144;
	mov.f32 	%f1352, 0f00000000;  	// 0
	max.ftz.f32 	%f866, %f141, %f1352;
	mov.f32 	%f1353, 0f00000000;  	// 0
	max.ftz.f32 	%f868, %f137, %f1353;
	mov.f32 	%f1354, 0f3f800000;  	// 1
	min.ftz.f32 	%f870, %f866, %f1354;
	mov.f32 	%f1355, 0f3f800000;  	// 1
	min.ftz.f32 	%f872, %f868, %f1355;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1356, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1357, %f272, %f285;
	sub.ftz.f32 	%f289, %f1356, %f1357;
	add.ftz.f32 	%f1358, %f872, %f870;
	add.ftz.f32 	%f1359, %f872, %f872;
	mul.ftz.f32 	%f1360, %f870, %f1359;
	sub.ftz.f32 	%f1361, %f1358, %f1360;
	mov.f32 	%f1362, 0f00000000;  	// 0
	max.ftz.f32 	%f1363, %f1361, %f1362;
	mov.f32 	%f1364, 0f3f800000;  	// 1
	min.ftz.f32 	%f1365, %f1363, %f1364;
	mul.ftz.f32 	%f1366, %f144, %f1365;
	fma.rn.ftz.f32 	%f1367, %f137, %f284, %f1366;
	mul.ftz.f32 	%f1368, %f286, %f1367;
	fma.rn.ftz.f32 	%f1350, %f141, %f289, %f1368;
	mov.f32 	%f1369, 0f00000000;  	// 0
	max.ftz.f32 	%f894, %f142, %f1369;
	mov.f32 	%f1370, 0f00000000;  	// 0
	max.ftz.f32 	%f896, %f138, %f1370;
	mov.f32 	%f1371, 0f3f800000;  	// 1
	min.ftz.f32 	%f898, %f894, %f1371;
	mov.f32 	%f1372, 0f3f800000;  	// 1
	min.ftz.f32 	%f900, %f896, %f1372;
	add.ftz.f32 	%f1373, %f900, %f898;
	add.ftz.f32 	%f1374, %f900, %f900;
	mul.ftz.f32 	%f1375, %f898, %f1374;
	sub.ftz.f32 	%f1376, %f1373, %f1375;
	mov.f32 	%f1377, 0f00000000;  	// 0
	max.ftz.f32 	%f1378, %f1376, %f1377;
	mov.f32 	%f1379, 0f3f800000;  	// 1
	min.ftz.f32 	%f1380, %f1378, %f1379;
	mul.ftz.f32 	%f1381, %f144, %f1380;
	fma.rn.ftz.f32 	%f1382, %f138, %f284, %f1381;
	mul.ftz.f32 	%f1383, %f286, %f1382;
	fma.rn.ftz.f32 	%f1349, %f142, %f289, %f1383;
	mov.f32 	%f1384, 0f00000000;  	// 0
	max.ftz.f32 	%f919, %f143, %f1384;
	mov.f32 	%f1385, 0f00000000;  	// 0
	max.ftz.f32 	%f921, %f139, %f1385;
	mov.f32 	%f1386, 0f3f800000;  	// 1
	min.ftz.f32 	%f923, %f919, %f1386;
	mov.f32 	%f1387, 0f3f800000;  	// 1
	min.ftz.f32 	%f925, %f921, %f1387;
	add.ftz.f32 	%f1388, %f925, %f923;
	add.ftz.f32 	%f1389, %f925, %f925;
	mul.ftz.f32 	%f1390, %f923, %f1389;
	sub.ftz.f32 	%f1391, %f1388, %f1390;
	mov.f32 	%f1392, 0f00000000;  	// 0
	max.ftz.f32 	%f1393, %f1391, %f1392;
	mov.f32 	%f1394, 0f3f800000;  	// 1
	min.ftz.f32 	%f1395, %f1393, %f1394;
	mul.ftz.f32 	%f1396, %f144, %f1395;
	fma.rn.ftz.f32 	%f1397, %f139, %f284, %f1396;
	mul.ftz.f32 	%f1398, %f286, %f1397;
	fma.rn.ftz.f32 	%f1348, %f143, %f289, %f1398;
$Lt_128_296450:
	.loc	6	210	0
	mov.f32 	%f137, %f1350;
	mov.f32 	%f138, %f1349;
	mov.f32 	%f139, %f1348;
	mov.f32 	%f140, %f1344;
	bra.uni 	$Lt_128_315394;
$Lt_128_5634:
	.loc	22	486	0
	ld.param.f32 	%f1399, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1399, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1400, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1400;
	mov.f32 	%f1401, %f275;
	mov.f32 	%f1402, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1403, %f275, %f1402;
	mov.f32 	%f1404, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p103, %f1403, %f1404;
	@!%p103 bra 	$Lt_128_297218;
	mov.f32 	%f1405, 0f00000000;  	// 0
	mov.f32 	%f1406, 0f00000000;  	// 0
	mov.f32 	%f1407, 0f00000000;  	// 0
	mov.f32 	%f1401, 0f00000000;  	// 0
	bra.uni 	$Lt_128_296962;
$Lt_128_297218:
	mov.f32 	%f1408, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1408, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1409, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1410, %f272, %f285;
	sub.ftz.f32 	%f289, %f1409, %f1410;
	mov.f32 	%f1411, 0f00000000;  	// 0
	max.ftz.f32 	%f1412, %f141, %f1411;
	mov.f32 	%f1413, 0f3f800000;  	// 1
	min.ftz.f32 	%f1414, %f1412, %f1413;
	mov.f32 	%f1415, 0f00000000;  	// 0
	max.ftz.f32 	%f1416, %f137, %f1415;
	mov.f32 	%f1417, 0f3f800000;  	// 1
	min.ftz.f32 	%f1418, %f1416, %f1417;
	sub.ftz.f32 	%f1419, %f1414, %f1418;
	mov.f32 	%f1420, 0f00000000;  	// 0
	max.ftz.f32 	%f1421, %f1419, %f1420;
	mov.f32 	%f1422, 0f3f800000;  	// 1
	min.ftz.f32 	%f1423, %f1421, %f1422;
	mul.ftz.f32 	%f1424, %f144, %f1423;
	fma.rn.ftz.f32 	%f1425, %f137, %f284, %f1424;
	mul.ftz.f32 	%f1426, %f286, %f1425;
	fma.rn.ftz.f32 	%f1407, %f141, %f289, %f1426;
	mov.f32 	%f1427, 0f00000000;  	// 0
	max.ftz.f32 	%f1428, %f142, %f1427;
	mov.f32 	%f1429, 0f3f800000;  	// 1
	min.ftz.f32 	%f1430, %f1428, %f1429;
	mov.f32 	%f1431, 0f00000000;  	// 0
	max.ftz.f32 	%f1432, %f138, %f1431;
	mov.f32 	%f1433, 0f3f800000;  	// 1
	min.ftz.f32 	%f1434, %f1432, %f1433;
	sub.ftz.f32 	%f1435, %f1430, %f1434;
	mov.f32 	%f1436, 0f00000000;  	// 0
	max.ftz.f32 	%f1437, %f1435, %f1436;
	mov.f32 	%f1438, 0f3f800000;  	// 1
	min.ftz.f32 	%f1439, %f1437, %f1438;
	mul.ftz.f32 	%f1440, %f144, %f1439;
	fma.rn.ftz.f32 	%f1441, %f138, %f284, %f1440;
	mul.ftz.f32 	%f1442, %f286, %f1441;
	fma.rn.ftz.f32 	%f1406, %f142, %f289, %f1442;
	mov.f32 	%f1443, 0f00000000;  	// 0
	max.ftz.f32 	%f1444, %f143, %f1443;
	mov.f32 	%f1445, 0f3f800000;  	// 1
	min.ftz.f32 	%f1446, %f1444, %f1445;
	mov.f32 	%f1447, 0f00000000;  	// 0
	max.ftz.f32 	%f1448, %f139, %f1447;
	mov.f32 	%f1449, 0f3f800000;  	// 1
	min.ftz.f32 	%f1450, %f1448, %f1449;
	sub.ftz.f32 	%f1451, %f1446, %f1450;
	mov.f32 	%f1452, 0f00000000;  	// 0
	max.ftz.f32 	%f1453, %f1451, %f1452;
	mov.f32 	%f1454, 0f3f800000;  	// 1
	min.ftz.f32 	%f1455, %f1453, %f1454;
	mul.ftz.f32 	%f1456, %f144, %f1455;
	fma.rn.ftz.f32 	%f1457, %f139, %f284, %f1456;
	mul.ftz.f32 	%f1458, %f286, %f1457;
	fma.rn.ftz.f32 	%f1405, %f143, %f289, %f1458;
$Lt_128_296962:
	.loc	6	211	0
	mov.f32 	%f137, %f1407;
	mov.f32 	%f138, %f1406;
	mov.f32 	%f139, %f1405;
	mov.f32 	%f140, %f1401;
	bra.uni 	$Lt_128_315394;
$Lt_128_5890:
	.loc	22	487	0
	ld.param.f32 	%f1459, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1459, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1460, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1460;
	mov.f32 	%f1461, %f275;
	mov.f32 	%f1462, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1463, %f275, %f1462;
	mov.f32 	%f1464, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p104, %f1463, %f1464;
	@!%p104 bra 	$Lt_128_297730;
	mov.f32 	%f1465, 0f00000000;  	// 0
	mov.f32 	%f1466, 0f00000000;  	// 0
	mov.f32 	%f1467, 0f00000000;  	// 0
	mov.f32 	%f1461, 0f00000000;  	// 0
	bra.uni 	$Lt_128_297474;
$Lt_128_297730:
	mov.f32 	%f1468, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1468, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1469, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1470, %f272, %f285;
	sub.ftz.f32 	%f289, %f1469, %f1470;
	mov.f32 	%f1471, 0f00000000;  	// 0
	max.ftz.f32 	%f1472, %f141, %f1471;
	mov.f32 	%f1473, 0f3f800000;  	// 1
	min.ftz.f32 	%f1474, %f1472, %f1473;
	mov.f32 	%f1475, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1476, %f137, %f1475;
	mov.f32 	%f1477, 0f3f800000;  	// 1
	min.ftz.f32 	%f1478, %f1476, %f1477;
	div.approx.ftz.f32 	%f1479, %f1474, %f1478;
	mov.f32 	%f1480, 0f00000000;  	// 0
	max.ftz.f32 	%f1481, %f1479, %f1480;
	mov.f32 	%f1482, 0f3f800000;  	// 1
	min.ftz.f32 	%f1483, %f1481, %f1482;
	mul.ftz.f32 	%f1484, %f144, %f1483;
	fma.rn.ftz.f32 	%f1485, %f137, %f284, %f1484;
	mul.ftz.f32 	%f1486, %f286, %f1485;
	fma.rn.ftz.f32 	%f1467, %f141, %f289, %f1486;
	mov.f32 	%f1487, 0f00000000;  	// 0
	max.ftz.f32 	%f1488, %f142, %f1487;
	mov.f32 	%f1489, 0f3f800000;  	// 1
	min.ftz.f32 	%f1490, %f1488, %f1489;
	mov.f32 	%f1491, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1492, %f138, %f1491;
	mov.f32 	%f1493, 0f3f800000;  	// 1
	min.ftz.f32 	%f1494, %f1492, %f1493;
	div.approx.ftz.f32 	%f1495, %f1490, %f1494;
	mov.f32 	%f1496, 0f00000000;  	// 0
	max.ftz.f32 	%f1497, %f1495, %f1496;
	mov.f32 	%f1498, 0f3f800000;  	// 1
	min.ftz.f32 	%f1499, %f1497, %f1498;
	mul.ftz.f32 	%f1500, %f144, %f1499;
	fma.rn.ftz.f32 	%f1501, %f138, %f284, %f1500;
	mul.ftz.f32 	%f1502, %f286, %f1501;
	fma.rn.ftz.f32 	%f1466, %f142, %f289, %f1502;
	mov.f32 	%f1503, 0f00000000;  	// 0
	max.ftz.f32 	%f1504, %f143, %f1503;
	mov.f32 	%f1505, 0f3f800000;  	// 1
	min.ftz.f32 	%f1506, %f1504, %f1505;
	mov.f32 	%f1507, 0f33d6bf95;  	// 1e-007
	max.ftz.f32 	%f1508, %f139, %f1507;
	mov.f32 	%f1509, 0f3f800000;  	// 1
	min.ftz.f32 	%f1510, %f1508, %f1509;
	div.approx.ftz.f32 	%f1511, %f1506, %f1510;
	mov.f32 	%f1512, 0f00000000;  	// 0
	max.ftz.f32 	%f1513, %f1511, %f1512;
	mov.f32 	%f1514, 0f3f800000;  	// 1
	min.ftz.f32 	%f1515, %f1513, %f1514;
	mul.ftz.f32 	%f1516, %f144, %f1515;
	fma.rn.ftz.f32 	%f1517, %f139, %f284, %f1516;
	mul.ftz.f32 	%f1518, %f286, %f1517;
	fma.rn.ftz.f32 	%f1465, %f143, %f289, %f1518;
$Lt_128_297474:
	.loc	6	212	0
	mov.f32 	%f137, %f1467;
	mov.f32 	%f138, %f1466;
	mov.f32 	%f139, %f1465;
	mov.f32 	%f140, %f1461;
	bra.uni 	$Lt_128_315394;
$Lt_128_6146:
	.loc	22	154	0
	setp.gt.ftz.f32 	%p105, %f138, %f137;
	max.ftz.f32 	%f1519, %f138, %f137;
	selp.f32 	%f1520, %f137, %f138, %p105;
	max.ftz.f32 	%f1521, %f1519, %f139;
	setp.lt.ftz.f32 	%p106, %f1520, %f139;
	selp.f32 	%f1522, %f1520, %f139, %p106;
	setp.eq.ftz.f32 	%p107, %f1522, %f139;
	@!%p107 bra 	$Lt_128_298242;
	setp.eq.ftz.f32 	%p108, %f1521, %f138;
	@!%p108 bra 	$Lt_128_298754;
	setp.lt.ftz.f32 	%p109, %f139, %f138;
	@!%p109 bra 	$Lt_128_299266;
	.loc	22	161	0
	max.ftz.f32 	%f1523, %f141, %f142;
	setp.lt.ftz.f32 	%p110, %f141, %f142;
	max.ftz.f32 	%f1524, %f1523, %f143;
	selp.f32 	%f1525, %f141, %f142, %p110;
	setp.lt.ftz.f32 	%p111, %f1525, %f143;
	selp.f32 	%f1526, %f1525, %f143, %p111;
	sub.ftz.f32 	%f1527, %f1524, %f1526;
	cvt.ftz.sat.f32.f32 	%f1528, %f1527;
	sub.ftz.f32 	%f1529, %f137, %f139;
	mul.ftz.f32 	%f1530, %f1528, %f1529;
	sub.ftz.f32 	%f1531, %f138, %f139;
	div.approx.ftz.f32 	%f1532, %f1530, %f1531;
	.loc	22	162	0
	mov.f32 	%f1533, %f1528;
	bra.uni 	$Lt_128_299522;
$Lt_128_299266:
	.loc	22	166	0
	mov.f32 	%f1532, 0f00000000;  	// 0
	mov.f32 	%f1533, 0f00000000;  	// 0
	bra.uni 	$Lt_128_299522;
$Lt_128_298754:
	setp.lt.ftz.f32 	%p112, %f139, %f137;
	@!%p112 bra 	$Lt_128_299778;
	.loc	22	173	0
	max.ftz.f32 	%f1523, %f141, %f142;
	setp.lt.ftz.f32 	%p110, %f141, %f142;
	max.ftz.f32 	%f1524, %f1523, %f143;
	selp.f32 	%f1525, %f141, %f142, %p110;
	setp.lt.ftz.f32 	%p111, %f1525, %f143;
	selp.f32 	%f1526, %f1525, %f143, %p111;
	sub.ftz.f32 	%f1527, %f1524, %f1526;
	cvt.ftz.sat.f32.f32 	%f1528, %f1527;
	sub.ftz.f32 	%f1534, %f138, %f139;
	mul.ftz.f32 	%f1535, %f1528, %f1534;
	sub.ftz.f32 	%f1536, %f137, %f139;
	div.approx.ftz.f32 	%f1533, %f1535, %f1536;
	.loc	22	174	0
	mov.f32 	%f1532, %f1528;
	bra.uni 	$Lt_128_299522;
$Lt_128_299778:
	.loc	22	178	0
	mov.f32 	%f1532, 0f00000000;  	// 0
	mov.f32 	%f1533, 0f00000000;  	// 0
$Lt_128_299522:
$Lt_128_298498:
	mov.f32 	%f1537, 0f00000000;  	// 0
	bra.uni 	$Lt_128_302082;
$Lt_128_298242:
	setp.eq.ftz.f32 	%p113, %f1522, %f138;
	setp.eq.ftz.f32 	%p114, %f1521, %f139;
	@!%p114 bra 	$Lt_128_300290;
	@!%p113 bra 	$Lt_128_300802;
	setp.gt.ftz.f32 	%p115, %f139, %f138;
	@!%p115 bra 	$Lt_128_301314;
	.loc	22	191	0
	max.ftz.f32 	%f1523, %f141, %f142;
	setp.lt.ftz.f32 	%p110, %f141, %f142;
	max.ftz.f32 	%f1524, %f1523, %f143;
	selp.f32 	%f1525, %f141, %f142, %p110;
	setp.lt.ftz.f32 	%p111, %f1525, %f143;
	selp.f32 	%f1526, %f1525, %f143, %p111;
	sub.ftz.f32 	%f1527, %f1524, %f1526;
	cvt.ftz.sat.f32.f32 	%f1528, %f1527;
	sub.ftz.f32 	%f1538, %f137, %f138;
	mul.ftz.f32 	%f1539, %f1528, %f1538;
	sub.ftz.f32 	%f1540, %f139, %f138;
	div.approx.ftz.f32 	%f1532, %f1539, %f1540;
	.loc	22	192	0
	mov.f32 	%f1537, %f1528;
	bra.uni 	$Lt_128_301058;
$Lt_128_301314:
	.loc	22	196	0
	mov.f32 	%f1532, 0f00000000;  	// 0
	mov.f32 	%f1537, 0f00000000;  	// 0
$Lt_128_301058:
	mov.f32 	%f1533, 0f00000000;  	// 0
	bra.uni 	$Lt_128_302082;
$Lt_128_300802:
	setp.gt.ftz.f32 	%p116, %f139, %f137;
	@!%p116 bra 	$Lt_128_301826;
	.loc	22	204	0
	max.ftz.f32 	%f1523, %f141, %f142;
	setp.lt.ftz.f32 	%p110, %f141, %f142;
	max.ftz.f32 	%f1524, %f1523, %f143;
	selp.f32 	%f1525, %f141, %f142, %p110;
	setp.lt.ftz.f32 	%p111, %f1525, %f143;
	selp.f32 	%f1526, %f1525, %f143, %p111;
	sub.ftz.f32 	%f1527, %f1524, %f1526;
	cvt.ftz.sat.f32.f32 	%f1528, %f1527;
	sub.ftz.f32 	%f1541, %f138, %f137;
	mul.ftz.f32 	%f1542, %f1528, %f1541;
	sub.ftz.f32 	%f1543, %f139, %f137;
	div.approx.ftz.f32 	%f1533, %f1542, %f1543;
	.loc	22	205	0
	mov.f32 	%f1537, %f1528;
	bra.uni 	$Lt_128_301570;
$Lt_128_301826:
	.loc	22	209	0
	mov.f32 	%f1537, 0f00000000;  	// 0
	mov.f32 	%f1533, 0f00000000;  	// 0
$Lt_128_301570:
	.loc	22	211	0
	mov.f32 	%f1532, 0f00000000;  	// 0
	bra.uni 	$Lt_128_302082;
$Lt_128_300290:
	@!%p113 bra 	$Lt_128_302338;
	setp.lt.ftz.f32 	%p117, %f138, %f137;
	@!%p117 bra 	$Lt_128_302850;
	.loc	22	220	0
	max.ftz.f32 	%f1523, %f141, %f142;
	setp.lt.ftz.f32 	%p110, %f141, %f142;
	max.ftz.f32 	%f1524, %f1523, %f143;
	selp.f32 	%f1525, %f141, %f142, %p110;
	setp.lt.ftz.f32 	%p111, %f1525, %f143;
	selp.f32 	%f1526, %f1525, %f143, %p111;
	sub.ftz.f32 	%f1527, %f1524, %f1526;
	cvt.ftz.sat.f32.f32 	%f1528, %f1527;
	sub.ftz.f32 	%f1544, %f139, %f138;
	mul.ftz.f32 	%f1545, %f1528, %f1544;
	sub.ftz.f32 	%f1546, %f137, %f138;
	div.approx.ftz.f32 	%f1537, %f1545, %f1546;
	.loc	22	221	0
	mov.f32 	%f1532, %f1528;
	bra.uni 	$Lt_128_302594;
$Lt_128_302850:
	.loc	22	225	0
	mov.f32 	%f1532, 0f00000000;  	// 0
	mov.f32 	%f1537, 0f00000000;  	// 0
$Lt_128_302594:
	mov.f32 	%f1533, 0f00000000;  	// 0
	bra.uni 	$Lt_128_302082;
$Lt_128_302338:
	@!%p105 bra 	$Lt_128_303362;
	.loc	22	233	0
	max.ftz.f32 	%f1523, %f141, %f142;
	setp.lt.ftz.f32 	%p110, %f141, %f142;
	max.ftz.f32 	%f1524, %f1523, %f143;
	selp.f32 	%f1525, %f141, %f142, %p110;
	setp.lt.ftz.f32 	%p111, %f1525, %f143;
	selp.f32 	%f1526, %f1525, %f143, %p111;
	sub.ftz.f32 	%f1527, %f1524, %f1526;
	cvt.ftz.sat.f32.f32 	%f1528, %f1527;
	sub.ftz.f32 	%f1547, %f139, %f137;
	mul.ftz.f32 	%f1548, %f1528, %f1547;
	sub.ftz.f32 	%f1549, %f138, %f137;
	div.approx.ftz.f32 	%f1537, %f1548, %f1549;
	.loc	22	234	0
	mov.f32 	%f1533, %f1528;
	bra.uni 	$Lt_128_303106;
$Lt_128_303362:
	.loc	22	238	0
	mov.f32 	%f1537, 0f00000000;  	// 0
	mov.f32 	%f1533, 0f00000000;  	// 0
$Lt_128_303106:
	.loc	22	240	0
	mov.f32 	%f1532, 0f00000000;  	// 0
$Lt_128_302082:
$Lt_128_300034:
$Lt_128_297986:
	.loc	22	113	0
	ld.const.f32 	%f608, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1550, %f142, %f608;
	mul.ftz.f32 	%f1551, %f1533, %f608;
	ld.const.f32 	%f607, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1552, %f607, %f143, %f1550;
	fma.rn.ftz.f32 	%f1553, %f607, %f1537, %f1551;
	ld.const.f32 	%f606, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1554, %f606, %f141, %f1552;
	fma.rn.ftz.f32 	%f1555, %f606, %f1532, %f1553;
	cvt.ftz.sat.f32.f32 	%f1556, %f1554;
	cvt.ftz.sat.f32.f32 	%f1557, %f1555;
	sub.ftz.f32 	%f1558, %f1556, %f1557;
	add.ftz.f32 	%f1559, %f1558, %f1532;
	mov.f32 	%f1560, %f1559;
	add.ftz.f32 	%f1561, %f1558, %f1533;
	mov.f32 	%f1562, %f1561;
	add.ftz.f32 	%f1563, %f1558, %f1537;
	mov.f32 	%f1564, %f1563;
	.loc	22	50	0
	mul.ftz.f32 	%f1565, %f1561, %f608;
	fma.rn.ftz.f32 	%f1566, %f607, %f1563, %f1565;
	fma.rn.ftz.f32 	%f1567, %f606, %f1559, %f1566;
	cvt.ftz.sat.f32.f32 	%f1568, %f1567;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p118, %f1561, %f1559;
	selp.f32 	%f1569, %f1559, %f1561, %p118;
	setp.lt.ftz.f32 	%p119, %f1569, %f1563;
	selp.f32 	%f1570, %f1569, %f1563, %p119;
	mov.f32 	%f1571, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p120, %f1570, %f1571;
	@!%p120 bra 	$Lt_128_303618;
	.loc	22	119	0
	sub.ftz.f32 	%f1572, %f1568, %f1570;
	sub.ftz.f32 	%f1573, %f1563, %f1568;
	mul.ftz.f32 	%f1574, %f1568, %f1573;
	div.approx.ftz.f32 	%f1575, %f1574, %f1572;
	add.ftz.f32 	%f1564, %f1568, %f1575;
	.loc	22	120	0
	sub.ftz.f32 	%f1576, %f1561, %f1568;
	mul.ftz.f32 	%f1577, %f1568, %f1576;
	div.approx.ftz.f32 	%f1578, %f1577, %f1572;
	add.ftz.f32 	%f1562, %f1568, %f1578;
	.loc	22	121	0
	sub.ftz.f32 	%f1579, %f1559, %f1568;
	mul.ftz.f32 	%f1580, %f1568, %f1579;
	div.approx.ftz.f32 	%f1581, %f1580, %f1572;
	add.ftz.f32 	%f1560, %f1568, %f1581;
$Lt_128_303618:
	max.ftz.f32 	%f1582, %f1561, %f1559;
	max.ftz.f32 	%f1583, %f1582, %f1563;
	mov.f32 	%f1584, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p121, %f1583, %f1584;
	@!%p121 bra 	$Lt_128_304130;
	.loc	27	529	0
	mov.f32 	%f1585, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1586, %f1585, %f1568;
	sub.ftz.f32 	%f1587, %f1583, %f1568;
	sub.ftz.f32 	%f1588, %f1564, %f1568;
	mul.ftz.f32 	%f1589, %f1586, %f1588;
	div.approx.ftz.f32 	%f1590, %f1589, %f1587;
	.loc	22	125	0
	add.ftz.f32 	%f1564, %f1590, %f1568;
	.loc	27	529	0
	sub.ftz.f32 	%f1591, %f1562, %f1568;
	mul.ftz.f32 	%f1592, %f1586, %f1591;
	div.approx.ftz.f32 	%f1593, %f1592, %f1587;
	.loc	22	126	0
	add.ftz.f32 	%f1562, %f1593, %f1568;
	.loc	27	529	0
	sub.ftz.f32 	%f1594, %f1560, %f1568;
	mul.ftz.f32 	%f1595, %f1586, %f1594;
	div.approx.ftz.f32 	%f1596, %f1595, %f1587;
	.loc	22	127	0
	add.ftz.f32 	%f1560, %f1596, %f1568;
$Lt_128_304130:
	.loc	22	468	0
	ld.param.f32 	%f1597, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1597, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1598, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1598;
	mov.f32 	%f1599, %f275;
	mov.f32 	%f1600, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1601, %f275, %f1600;
	mov.f32 	%f1602, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p122, %f1601, %f1602;
	@!%p122 bra 	$Lt_128_304898;
	mov.f32 	%f1603, 0f00000000;  	// 0
	mov.f32 	%f1604, 0f00000000;  	// 0
	mov.f32 	%f1605, 0f00000000;  	// 0
	mov.f32 	%f1599, 0f00000000;  	// 0
	bra.uni 	$Lt_128_304642;
$Lt_128_304898:
	mov.f32 	%f1606, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1606, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1607, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1608, %f272, %f285;
	sub.ftz.f32 	%f289, %f1607, %f1608;
	mul.ftz.f32 	%f1609, %f284, %f1560;
	fma.rn.ftz.f32 	%f1610, %f1560, %f144, %f1609;
	mul.ftz.f32 	%f1611, %f286, %f1610;
	fma.rn.ftz.f32 	%f1605, %f141, %f289, %f1611;
	mul.ftz.f32 	%f1612, %f284, %f1562;
	fma.rn.ftz.f32 	%f1613, %f1562, %f144, %f1612;
	mul.ftz.f32 	%f1614, %f286, %f1613;
	fma.rn.ftz.f32 	%f1604, %f142, %f289, %f1614;
	mul.ftz.f32 	%f1615, %f284, %f1564;
	fma.rn.ftz.f32 	%f1616, %f1564, %f144, %f1615;
	mul.ftz.f32 	%f1617, %f286, %f1616;
	fma.rn.ftz.f32 	%f1603, %f143, %f289, %f1617;
$Lt_128_304642:
	.loc	6	213	0
	mov.f32 	%f137, %f1605;
	mov.f32 	%f138, %f1604;
	mov.f32 	%f139, %f1603;
	mov.f32 	%f140, %f1599;
	bra.uni 	$Lt_128_315394;
$Lt_128_6402:
	.loc	22	154	0
	max.ftz.f32 	%f1523, %f141, %f142;
	setp.lt.ftz.f32 	%p110, %f141, %f142;
	max.ftz.f32 	%f1524, %f1523, %f143;
	selp.f32 	%f1525, %f141, %f142, %p110;
	setp.lt.ftz.f32 	%p111, %f1525, %f143;
	selp.f32 	%f1526, %f1525, %f143, %p111;
	setp.eq.ftz.f32 	%p123, %f1526, %f143;
	@!%p123 bra 	$Lt_128_305410;
	setp.eq.ftz.f32 	%p124, %f1524, %f142;
	@!%p124 bra 	$Lt_128_305922;
	setp.gt.ftz.f32 	%p125, %f142, %f143;
	@!%p125 bra 	$Lt_128_306434;
	.loc	22	161	0
	setp.gt.ftz.f32 	%p105, %f138, %f137;
	max.ftz.f32 	%f1519, %f138, %f137;
	selp.f32 	%f1520, %f137, %f138, %p105;
	max.ftz.f32 	%f1521, %f1519, %f139;
	setp.lt.ftz.f32 	%p106, %f1520, %f139;
	selp.f32 	%f1522, %f1520, %f139, %p106;
	sub.ftz.f32 	%f1618, %f1521, %f1522;
	cvt.ftz.sat.f32.f32 	%f1619, %f1618;
	sub.ftz.f32 	%f1620, %f141, %f143;
	mul.ftz.f32 	%f1621, %f1619, %f1620;
	sub.ftz.f32 	%f1622, %f142, %f143;
	div.approx.ftz.f32 	%f1623, %f1621, %f1622;
	.loc	22	162	0
	mov.f32 	%f1624, %f1619;
	bra.uni 	$Lt_128_306690;
$Lt_128_306434:
	.loc	22	166	0
	mov.f32 	%f1623, 0f00000000;  	// 0
	mov.f32 	%f1624, 0f00000000;  	// 0
	bra.uni 	$Lt_128_306690;
$Lt_128_305922:
	setp.gt.ftz.f32 	%p126, %f141, %f143;
	@!%p126 bra 	$Lt_128_306946;
	.loc	22	173	0
	setp.gt.ftz.f32 	%p105, %f138, %f137;
	max.ftz.f32 	%f1519, %f138, %f137;
	selp.f32 	%f1520, %f137, %f138, %p105;
	max.ftz.f32 	%f1521, %f1519, %f139;
	setp.lt.ftz.f32 	%p106, %f1520, %f139;
	selp.f32 	%f1522, %f1520, %f139, %p106;
	sub.ftz.f32 	%f1618, %f1521, %f1522;
	cvt.ftz.sat.f32.f32 	%f1619, %f1618;
	sub.ftz.f32 	%f1625, %f142, %f143;
	mul.ftz.f32 	%f1626, %f1619, %f1625;
	sub.ftz.f32 	%f1627, %f141, %f143;
	div.approx.ftz.f32 	%f1624, %f1626, %f1627;
	.loc	22	174	0
	mov.f32 	%f1623, %f1619;
	bra.uni 	$Lt_128_306690;
$Lt_128_306946:
	.loc	22	178	0
	mov.f32 	%f1623, 0f00000000;  	// 0
	mov.f32 	%f1624, 0f00000000;  	// 0
$Lt_128_306690:
$Lt_128_305666:
	mov.f32 	%f1628, 0f00000000;  	// 0
	bra.uni 	$Lt_128_309250;
$Lt_128_305410:
	setp.eq.ftz.f32 	%p127, %f1526, %f142;
	setp.eq.ftz.f32 	%p128, %f1524, %f143;
	@!%p128 bra 	$Lt_128_307458;
	@!%p127 bra 	$Lt_128_307970;
	setp.lt.ftz.f32 	%p129, %f142, %f143;
	@!%p129 bra 	$Lt_128_308482;
	.loc	22	191	0
	setp.gt.ftz.f32 	%p105, %f138, %f137;
	max.ftz.f32 	%f1519, %f138, %f137;
	selp.f32 	%f1520, %f137, %f138, %p105;
	max.ftz.f32 	%f1521, %f1519, %f139;
	setp.lt.ftz.f32 	%p106, %f1520, %f139;
	selp.f32 	%f1522, %f1520, %f139, %p106;
	sub.ftz.f32 	%f1618, %f1521, %f1522;
	cvt.ftz.sat.f32.f32 	%f1619, %f1618;
	sub.ftz.f32 	%f1629, %f141, %f142;
	mul.ftz.f32 	%f1630, %f1619, %f1629;
	sub.ftz.f32 	%f1631, %f143, %f142;
	div.approx.ftz.f32 	%f1623, %f1630, %f1631;
	.loc	22	192	0
	mov.f32 	%f1628, %f1619;
	bra.uni 	$Lt_128_308226;
$Lt_128_308482:
	.loc	22	196	0
	mov.f32 	%f1623, 0f00000000;  	// 0
	mov.f32 	%f1628, 0f00000000;  	// 0
$Lt_128_308226:
	mov.f32 	%f1624, 0f00000000;  	// 0
	bra.uni 	$Lt_128_309250;
$Lt_128_307970:
	setp.lt.ftz.f32 	%p130, %f141, %f143;
	@!%p130 bra 	$Lt_128_308994;
	.loc	22	204	0
	setp.gt.ftz.f32 	%p105, %f138, %f137;
	max.ftz.f32 	%f1519, %f138, %f137;
	selp.f32 	%f1520, %f137, %f138, %p105;
	max.ftz.f32 	%f1521, %f1519, %f139;
	setp.lt.ftz.f32 	%p106, %f1520, %f139;
	selp.f32 	%f1522, %f1520, %f139, %p106;
	sub.ftz.f32 	%f1618, %f1521, %f1522;
	cvt.ftz.sat.f32.f32 	%f1619, %f1618;
	sub.ftz.f32 	%f1632, %f142, %f141;
	mul.ftz.f32 	%f1633, %f1619, %f1632;
	sub.ftz.f32 	%f1634, %f143, %f141;
	div.approx.ftz.f32 	%f1624, %f1633, %f1634;
	.loc	22	205	0
	mov.f32 	%f1628, %f1619;
	bra.uni 	$Lt_128_308738;
$Lt_128_308994:
	.loc	22	209	0
	mov.f32 	%f1628, 0f00000000;  	// 0
	mov.f32 	%f1624, 0f00000000;  	// 0
$Lt_128_308738:
	.loc	22	211	0
	mov.f32 	%f1623, 0f00000000;  	// 0
	bra.uni 	$Lt_128_309250;
$Lt_128_307458:
	@!%p127 bra 	$Lt_128_309506;
	setp.gt.ftz.f32 	%p131, %f141, %f142;
	@!%p131 bra 	$Lt_128_310018;
	.loc	22	220	0
	setp.gt.ftz.f32 	%p105, %f138, %f137;
	max.ftz.f32 	%f1519, %f138, %f137;
	selp.f32 	%f1520, %f137, %f138, %p105;
	max.ftz.f32 	%f1521, %f1519, %f139;
	setp.lt.ftz.f32 	%p106, %f1520, %f139;
	selp.f32 	%f1522, %f1520, %f139, %p106;
	sub.ftz.f32 	%f1618, %f1521, %f1522;
	cvt.ftz.sat.f32.f32 	%f1619, %f1618;
	sub.ftz.f32 	%f1635, %f143, %f142;
	mul.ftz.f32 	%f1636, %f1619, %f1635;
	sub.ftz.f32 	%f1637, %f141, %f142;
	div.approx.ftz.f32 	%f1628, %f1636, %f1637;
	.loc	22	221	0
	mov.f32 	%f1623, %f1619;
	bra.uni 	$Lt_128_309762;
$Lt_128_310018:
	.loc	22	225	0
	mov.f32 	%f1623, 0f00000000;  	// 0
	mov.f32 	%f1628, 0f00000000;  	// 0
$Lt_128_309762:
	mov.f32 	%f1624, 0f00000000;  	// 0
	bra.uni 	$Lt_128_309250;
$Lt_128_309506:
	@!%p110 bra 	$Lt_128_310530;
	.loc	22	233	0
	setp.gt.ftz.f32 	%p105, %f138, %f137;
	max.ftz.f32 	%f1519, %f138, %f137;
	selp.f32 	%f1520, %f137, %f138, %p105;
	max.ftz.f32 	%f1521, %f1519, %f139;
	setp.lt.ftz.f32 	%p106, %f1520, %f139;
	selp.f32 	%f1522, %f1520, %f139, %p106;
	sub.ftz.f32 	%f1618, %f1521, %f1522;
	cvt.ftz.sat.f32.f32 	%f1619, %f1618;
	sub.ftz.f32 	%f1638, %f143, %f141;
	mul.ftz.f32 	%f1639, %f1619, %f1638;
	sub.ftz.f32 	%f1640, %f142, %f141;
	div.approx.ftz.f32 	%f1628, %f1639, %f1640;
	.loc	22	234	0
	mov.f32 	%f1624, %f1619;
	bra.uni 	$Lt_128_310274;
$Lt_128_310530:
	.loc	22	238	0
	mov.f32 	%f1628, 0f00000000;  	// 0
	mov.f32 	%f1624, 0f00000000;  	// 0
$Lt_128_310274:
	.loc	22	240	0
	mov.f32 	%f1623, 0f00000000;  	// 0
$Lt_128_309250:
$Lt_128_307202:
$Lt_128_305154:
	.loc	22	113	0
	ld.const.f32 	%f608, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1641, %f142, %f608;
	mul.ftz.f32 	%f1642, %f1624, %f608;
	ld.const.f32 	%f607, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1643, %f607, %f143, %f1641;
	fma.rn.ftz.f32 	%f1644, %f607, %f1628, %f1642;
	ld.const.f32 	%f606, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1645, %f606, %f141, %f1643;
	fma.rn.ftz.f32 	%f1646, %f606, %f1623, %f1644;
	cvt.ftz.sat.f32.f32 	%f1647, %f1645;
	cvt.ftz.sat.f32.f32 	%f1648, %f1646;
	sub.ftz.f32 	%f1649, %f1647, %f1648;
	add.ftz.f32 	%f1650, %f1649, %f1623;
	mov.f32 	%f1651, %f1650;
	add.ftz.f32 	%f1652, %f1649, %f1624;
	mov.f32 	%f1653, %f1652;
	add.ftz.f32 	%f1654, %f1649, %f1628;
	mov.f32 	%f1655, %f1654;
	.loc	22	50	0
	mul.ftz.f32 	%f1656, %f1652, %f608;
	fma.rn.ftz.f32 	%f1657, %f607, %f1654, %f1656;
	fma.rn.ftz.f32 	%f1658, %f606, %f1650, %f1657;
	cvt.ftz.sat.f32.f32 	%f1659, %f1658;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p132, %f1652, %f1650;
	selp.f32 	%f1660, %f1650, %f1652, %p132;
	setp.lt.ftz.f32 	%p133, %f1660, %f1654;
	selp.f32 	%f1661, %f1660, %f1654, %p133;
	mov.f32 	%f1662, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p134, %f1661, %f1662;
	@!%p134 bra 	$Lt_128_310786;
	.loc	22	119	0
	sub.ftz.f32 	%f1663, %f1659, %f1661;
	sub.ftz.f32 	%f1664, %f1654, %f1659;
	mul.ftz.f32 	%f1665, %f1659, %f1664;
	div.approx.ftz.f32 	%f1666, %f1665, %f1663;
	add.ftz.f32 	%f1655, %f1659, %f1666;
	.loc	22	120	0
	sub.ftz.f32 	%f1667, %f1652, %f1659;
	mul.ftz.f32 	%f1668, %f1659, %f1667;
	div.approx.ftz.f32 	%f1669, %f1668, %f1663;
	add.ftz.f32 	%f1653, %f1659, %f1669;
	.loc	22	121	0
	sub.ftz.f32 	%f1670, %f1650, %f1659;
	mul.ftz.f32 	%f1671, %f1659, %f1670;
	div.approx.ftz.f32 	%f1672, %f1671, %f1663;
	add.ftz.f32 	%f1651, %f1659, %f1672;
$Lt_128_310786:
	max.ftz.f32 	%f1673, %f1652, %f1650;
	max.ftz.f32 	%f1674, %f1673, %f1654;
	mov.f32 	%f1675, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p135, %f1674, %f1675;
	@!%p135 bra 	$Lt_128_311298;
	.loc	27	529	0
	mov.f32 	%f1676, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1677, %f1676, %f1659;
	sub.ftz.f32 	%f1678, %f1674, %f1659;
	sub.ftz.f32 	%f1679, %f1655, %f1659;
	mul.ftz.f32 	%f1680, %f1677, %f1679;
	div.approx.ftz.f32 	%f1681, %f1680, %f1678;
	.loc	22	125	0
	add.ftz.f32 	%f1655, %f1681, %f1659;
	.loc	27	529	0
	sub.ftz.f32 	%f1682, %f1653, %f1659;
	mul.ftz.f32 	%f1683, %f1677, %f1682;
	div.approx.ftz.f32 	%f1684, %f1683, %f1678;
	.loc	22	126	0
	add.ftz.f32 	%f1653, %f1684, %f1659;
	.loc	27	529	0
	sub.ftz.f32 	%f1685, %f1651, %f1659;
	mul.ftz.f32 	%f1686, %f1677, %f1685;
	div.approx.ftz.f32 	%f1687, %f1686, %f1678;
	.loc	22	127	0
	add.ftz.f32 	%f1651, %f1687, %f1659;
$Lt_128_311298:
	.loc	22	468	0
	ld.param.f32 	%f1688, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1688, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1689, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1689;
	mov.f32 	%f1690, %f275;
	mov.f32 	%f1691, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1692, %f275, %f1691;
	mov.f32 	%f1693, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p136, %f1692, %f1693;
	@!%p136 bra 	$Lt_128_312066;
	mov.f32 	%f1694, 0f00000000;  	// 0
	mov.f32 	%f1695, 0f00000000;  	// 0
	mov.f32 	%f1696, 0f00000000;  	// 0
	mov.f32 	%f1690, 0f00000000;  	// 0
	bra.uni 	$Lt_128_311810;
$Lt_128_312066:
	mov.f32 	%f1697, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1697, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1698, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1699, %f272, %f285;
	sub.ftz.f32 	%f289, %f1698, %f1699;
	mul.ftz.f32 	%f1700, %f284, %f1651;
	fma.rn.ftz.f32 	%f1701, %f1651, %f144, %f1700;
	mul.ftz.f32 	%f1702, %f286, %f1701;
	fma.rn.ftz.f32 	%f1696, %f141, %f289, %f1702;
	mul.ftz.f32 	%f1703, %f284, %f1653;
	fma.rn.ftz.f32 	%f1704, %f1653, %f144, %f1703;
	mul.ftz.f32 	%f1705, %f286, %f1704;
	fma.rn.ftz.f32 	%f1695, %f142, %f289, %f1705;
	mul.ftz.f32 	%f1706, %f284, %f1655;
	fma.rn.ftz.f32 	%f1707, %f1655, %f144, %f1706;
	mul.ftz.f32 	%f1708, %f286, %f1707;
	fma.rn.ftz.f32 	%f1694, %f143, %f289, %f1708;
$Lt_128_311810:
	.loc	6	214	0
	mov.f32 	%f137, %f1696;
	mov.f32 	%f138, %f1695;
	mov.f32 	%f139, %f1694;
	mov.f32 	%f140, %f1690;
	bra.uni 	$Lt_128_315394;
$Lt_128_6658:
	.loc	22	113	0
	ld.const.f32 	%f608, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1709, %f608, %f138;
	mul.ftz.f32 	%f1710, %f142, %f608;
	ld.const.f32 	%f607, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1711, %f607, %f139, %f1709;
	fma.rn.ftz.f32 	%f1712, %f607, %f143, %f1710;
	ld.const.f32 	%f606, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1713, %f606, %f137, %f1711;
	fma.rn.ftz.f32 	%f1714, %f606, %f141, %f1712;
	cvt.ftz.sat.f32.f32 	%f1715, %f1713;
	cvt.ftz.sat.f32.f32 	%f1716, %f1714;
	sub.ftz.f32 	%f1717, %f1716, %f1715;
	add.ftz.f32 	%f1718, %f1717, %f137;
	mov.f32 	%f1719, %f1718;
	add.ftz.f32 	%f1720, %f1717, %f138;
	mov.f32 	%f1721, %f1720;
	add.ftz.f32 	%f1722, %f1717, %f139;
	mov.f32 	%f1723, %f1722;
	.loc	22	50	0
	mul.ftz.f32 	%f1724, %f1720, %f608;
	fma.rn.ftz.f32 	%f1725, %f607, %f1722, %f1724;
	fma.rn.ftz.f32 	%f1726, %f606, %f1718, %f1725;
	cvt.ftz.sat.f32.f32 	%f1727, %f1726;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p137, %f1720, %f1718;
	selp.f32 	%f1728, %f1718, %f1720, %p137;
	setp.lt.ftz.f32 	%p138, %f1728, %f1722;
	selp.f32 	%f1729, %f1728, %f1722, %p138;
	mov.f32 	%f1730, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p139, %f1729, %f1730;
	@!%p139 bra 	$Lt_128_312322;
	.loc	22	119	0
	sub.ftz.f32 	%f1731, %f1727, %f1729;
	sub.ftz.f32 	%f1732, %f1722, %f1727;
	mul.ftz.f32 	%f1733, %f1727, %f1732;
	div.approx.ftz.f32 	%f1734, %f1733, %f1731;
	add.ftz.f32 	%f1723, %f1727, %f1734;
	.loc	22	120	0
	sub.ftz.f32 	%f1735, %f1720, %f1727;
	mul.ftz.f32 	%f1736, %f1727, %f1735;
	div.approx.ftz.f32 	%f1737, %f1736, %f1731;
	add.ftz.f32 	%f1721, %f1727, %f1737;
	.loc	22	121	0
	sub.ftz.f32 	%f1738, %f1718, %f1727;
	mul.ftz.f32 	%f1739, %f1727, %f1738;
	div.approx.ftz.f32 	%f1740, %f1739, %f1731;
	add.ftz.f32 	%f1719, %f1727, %f1740;
$Lt_128_312322:
	max.ftz.f32 	%f1741, %f1720, %f1718;
	max.ftz.f32 	%f1742, %f1741, %f1722;
	mov.f32 	%f1743, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p140, %f1742, %f1743;
	@!%p140 bra 	$Lt_128_312834;
	.loc	27	529	0
	mov.f32 	%f1744, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1745, %f1744, %f1727;
	sub.ftz.f32 	%f1746, %f1742, %f1727;
	sub.ftz.f32 	%f1747, %f1723, %f1727;
	mul.ftz.f32 	%f1748, %f1745, %f1747;
	div.approx.ftz.f32 	%f1749, %f1748, %f1746;
	.loc	22	125	0
	add.ftz.f32 	%f1723, %f1749, %f1727;
	.loc	27	529	0
	sub.ftz.f32 	%f1750, %f1721, %f1727;
	mul.ftz.f32 	%f1751, %f1745, %f1750;
	div.approx.ftz.f32 	%f1752, %f1751, %f1746;
	.loc	22	126	0
	add.ftz.f32 	%f1721, %f1752, %f1727;
	.loc	27	529	0
	sub.ftz.f32 	%f1753, %f1719, %f1727;
	mul.ftz.f32 	%f1754, %f1745, %f1753;
	div.approx.ftz.f32 	%f1755, %f1754, %f1746;
	.loc	22	127	0
	add.ftz.f32 	%f1719, %f1755, %f1727;
$Lt_128_312834:
	.loc	22	468	0
	ld.param.f32 	%f1756, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1756, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1757, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1757;
	mov.f32 	%f1758, %f275;
	mov.f32 	%f1759, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1760, %f275, %f1759;
	mov.f32 	%f1761, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p141, %f1760, %f1761;
	@!%p141 bra 	$Lt_128_313602;
	mov.f32 	%f1762, 0f00000000;  	// 0
	mov.f32 	%f1763, 0f00000000;  	// 0
	mov.f32 	%f1764, 0f00000000;  	// 0
	mov.f32 	%f1758, 0f00000000;  	// 0
	bra.uni 	$Lt_128_313346;
$Lt_128_313602:
	mov.f32 	%f1765, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1765, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1766, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1767, %f272, %f285;
	sub.ftz.f32 	%f289, %f1766, %f1767;
	mul.ftz.f32 	%f1768, %f284, %f1719;
	fma.rn.ftz.f32 	%f1769, %f1719, %f144, %f1768;
	mul.ftz.f32 	%f1770, %f286, %f1769;
	fma.rn.ftz.f32 	%f1764, %f141, %f289, %f1770;
	mul.ftz.f32 	%f1771, %f284, %f1721;
	fma.rn.ftz.f32 	%f1772, %f1721, %f144, %f1771;
	mul.ftz.f32 	%f1773, %f286, %f1772;
	fma.rn.ftz.f32 	%f1763, %f142, %f289, %f1773;
	mul.ftz.f32 	%f1774, %f284, %f1723;
	fma.rn.ftz.f32 	%f1775, %f1723, %f144, %f1774;
	mul.ftz.f32 	%f1776, %f286, %f1775;
	fma.rn.ftz.f32 	%f1762, %f143, %f289, %f1776;
$Lt_128_313346:
	.loc	6	215	0
	mov.f32 	%f137, %f1764;
	mov.f32 	%f138, %f1763;
	mov.f32 	%f139, %f1762;
	mov.f32 	%f140, %f1758;
	bra.uni 	$Lt_128_315394;
$Lt_128_6914:
	.loc	22	113	0
	ld.const.f32 	%f608, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f1777, %f608, %f138;
	mul.ftz.f32 	%f1778, %f142, %f608;
	ld.const.f32 	%f607, [kRGB32f_To_601YPbPr+0];
	fma.rn.ftz.f32 	%f1779, %f607, %f139, %f1777;
	fma.rn.ftz.f32 	%f1780, %f607, %f143, %f1778;
	ld.const.f32 	%f606, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f1781, %f606, %f137, %f1779;
	fma.rn.ftz.f32 	%f1782, %f606, %f141, %f1780;
	cvt.ftz.sat.f32.f32 	%f1783, %f1781;
	cvt.ftz.sat.f32.f32 	%f1784, %f1782;
	sub.ftz.f32 	%f1785, %f1783, %f1784;
	add.ftz.f32 	%f1786, %f1785, %f141;
	mov.f32 	%f1787, %f1786;
	add.ftz.f32 	%f1788, %f1785, %f142;
	mov.f32 	%f1789, %f1788;
	add.ftz.f32 	%f1790, %f1785, %f143;
	mov.f32 	%f1791, %f1790;
	.loc	22	50	0
	mul.ftz.f32 	%f1792, %f1788, %f608;
	fma.rn.ftz.f32 	%f1793, %f607, %f1790, %f1792;
	fma.rn.ftz.f32 	%f1794, %f606, %f1786, %f1793;
	cvt.ftz.sat.f32.f32 	%f1795, %f1794;
	.loc	22	116	0
	setp.gt.ftz.f32 	%p142, %f1788, %f1786;
	selp.f32 	%f1796, %f1786, %f1788, %p142;
	setp.lt.ftz.f32 	%p143, %f1796, %f1790;
	selp.f32 	%f1797, %f1796, %f1790, %p143;
	mov.f32 	%f1798, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p144, %f1797, %f1798;
	@!%p144 bra 	$Lt_128_313858;
	.loc	22	119	0
	sub.ftz.f32 	%f1799, %f1795, %f1797;
	sub.ftz.f32 	%f1800, %f1790, %f1795;
	mul.ftz.f32 	%f1801, %f1795, %f1800;
	div.approx.ftz.f32 	%f1802, %f1801, %f1799;
	add.ftz.f32 	%f1791, %f1795, %f1802;
	.loc	22	120	0
	sub.ftz.f32 	%f1803, %f1788, %f1795;
	mul.ftz.f32 	%f1804, %f1795, %f1803;
	div.approx.ftz.f32 	%f1805, %f1804, %f1799;
	add.ftz.f32 	%f1789, %f1795, %f1805;
	.loc	22	121	0
	sub.ftz.f32 	%f1806, %f1786, %f1795;
	mul.ftz.f32 	%f1807, %f1795, %f1806;
	div.approx.ftz.f32 	%f1808, %f1807, %f1799;
	add.ftz.f32 	%f1787, %f1795, %f1808;
$Lt_128_313858:
	max.ftz.f32 	%f1809, %f1788, %f1786;
	max.ftz.f32 	%f1810, %f1809, %f1790;
	mov.f32 	%f1811, 0f3f800000;  	// 1
	setp.gt.ftz.f32 	%p145, %f1810, %f1811;
	@!%p145 bra 	$Lt_128_314370;
	.loc	27	529	0
	mov.f32 	%f1812, 0f3f800000;  	// 1
	sub.ftz.f32 	%f1813, %f1812, %f1795;
	sub.ftz.f32 	%f1814, %f1810, %f1795;
	sub.ftz.f32 	%f1815, %f1791, %f1795;
	mul.ftz.f32 	%f1816, %f1813, %f1815;
	div.approx.ftz.f32 	%f1817, %f1816, %f1814;
	.loc	22	125	0
	add.ftz.f32 	%f1791, %f1817, %f1795;
	.loc	27	529	0
	sub.ftz.f32 	%f1818, %f1789, %f1795;
	mul.ftz.f32 	%f1819, %f1813, %f1818;
	div.approx.ftz.f32 	%f1820, %f1819, %f1814;
	.loc	22	126	0
	add.ftz.f32 	%f1789, %f1820, %f1795;
	.loc	27	529	0
	sub.ftz.f32 	%f1821, %f1787, %f1795;
	mul.ftz.f32 	%f1822, %f1813, %f1821;
	div.approx.ftz.f32 	%f1823, %f1822, %f1814;
	.loc	22	127	0
	add.ftz.f32 	%f1787, %f1823, %f1795;
$Lt_128_314370:
	.loc	22	468	0
	ld.param.f32 	%f1824, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f272, %f1824, %f140;
	add.ftz.f32 	%f273, %f272, %f144;
	mul.ftz.f32 	%f1825, %f272, %f144;
	sub.ftz.f32 	%f275, %f273, %f1825;
	mov.f32 	%f1826, %f275;
	mov.f32 	%f1827, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1828, %f275, %f1827;
	mov.f32 	%f1829, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p146, %f1828, %f1829;
	@!%p146 bra 	$Lt_128_315138;
	mov.f32 	%f1830, 0f00000000;  	// 0
	mov.f32 	%f1831, 0f00000000;  	// 0
	mov.f32 	%f1832, 0f00000000;  	// 0
	mov.f32 	%f1826, 0f00000000;  	// 0
	bra.uni 	$Lt_128_314882;
$Lt_128_315138:
	mov.f32 	%f1833, 0f3f800000;  	// 1
	sub.ftz.f32 	%f284, %f1833, %f144;
	rcp.approx.ftz.f32 	%f285, %f275;
	mul.ftz.f32 	%f286, %f285, %f272;
	mov.f32 	%f1834, 0f3f800000;  	// 1
	mul.ftz.f32 	%f1835, %f272, %f285;
	sub.ftz.f32 	%f289, %f1834, %f1835;
	mul.ftz.f32 	%f1836, %f284, %f1787;
	fma.rn.ftz.f32 	%f1837, %f1787, %f144, %f1836;
	mul.ftz.f32 	%f1838, %f286, %f1837;
	fma.rn.ftz.f32 	%f1832, %f141, %f289, %f1838;
	mul.ftz.f32 	%f1839, %f284, %f1789;
	fma.rn.ftz.f32 	%f1840, %f1789, %f144, %f1839;
	mul.ftz.f32 	%f1841, %f286, %f1840;
	fma.rn.ftz.f32 	%f1831, %f142, %f289, %f1841;
	mul.ftz.f32 	%f1842, %f284, %f1791;
	fma.rn.ftz.f32 	%f1843, %f1791, %f144, %f1842;
	mul.ftz.f32 	%f1844, %f286, %f1843;
	fma.rn.ftz.f32 	%f1830, %f143, %f289, %f1844;
$Lt_128_314882:
	.loc	6	216	0
	mov.f32 	%f137, %f1832;
	mov.f32 	%f138, %f1831;
	mov.f32 	%f139, %f1830;
	mov.f32 	%f140, %f1826;
	bra.uni 	$Lt_128_315394;
$Lt_128_270594:
	.loc	6	218	0
	@!%p7 bra 	$Lt_128_315394;
	ld.param.f32 	%f1845, [__cudaparm_HorizontalOnlyScaleKernel_inAlphaGain];
	mul.ftz.f32 	%f1846, %f1845, %f140;
	mov.f32 	%f1847, 0fb70637bd;  	// -8e-006
	add.ftz.f32 	%f1848, %f1846, %f1847;
	mov.f32 	%f1849, 0f00000000;  	// 0
	setp.le.ftz.f32 	%p147, %f1848, %f1849;
	@!%p147 bra 	$Lt_128_316162;
	mov.f32 	%f139, 0f00000000;   	// 0
	mov.f32 	%f138, 0f00000000;   	// 0
	mov.f32 	%f137, 0f00000000;   	// 0
	bra.uni 	$Lt_128_315906;
$Lt_128_316162:
	.loc	6	238	0
	rcp.approx.ftz.f32 	%f1850, %f140;
	mul.ftz.f32 	%f1851, %f1850, %f137;
	mov.f32 	%f1852, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p148, %f1851, %f1852;
	@!%p148 bra 	$Lt_128_316418;
	.loc	5	242	0
	neg.ftz.f32 	%f1853, %f1851;
	lg2.approx.ftz.f32 	%f1854, %f1853;
	mov.f32 	%f1855, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1856, %f1854, %f1855;
	ex2.approx.ftz.f32 	%f1857, %f1856;
	neg.ftz.f32 	%f1858, %f1857;
	bra.uni 	$LDWendi___log2f_305_34;
$Lt_128_316418:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f1859, %f1851;
	mov.f32 	%f1860, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1861, %f1859, %f1860;
	ex2.approx.ftz.f32 	%f1858, %f1861;
$LDWendi___log2f_305_34:
	.loc	6	238	0
	mov.f32 	%f137, %f1858;
	.loc	6	239	0
	mul.ftz.f32 	%f1862, %f1850, %f138;
	mov.f32 	%f1863, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p149, %f1862, %f1863;
	@!%p149 bra 	$Lt_128_316930;
	.loc	5	242	0
	neg.ftz.f32 	%f1864, %f1862;
	lg2.approx.ftz.f32 	%f1865, %f1864;
	mov.f32 	%f1866, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1867, %f1865, %f1866;
	ex2.approx.ftz.f32 	%f1868, %f1867;
	neg.ftz.f32 	%f1869, %f1868;
	bra.uni 	$LDWendi___log2f_305_32;
$Lt_128_316930:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f1870, %f1862;
	mov.f32 	%f1871, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1872, %f1870, %f1871;
	ex2.approx.ftz.f32 	%f1869, %f1872;
$LDWendi___log2f_305_32:
	.loc	6	239	0
	mov.f32 	%f138, %f1869;
	.loc	6	240	0
	mul.ftz.f32 	%f1873, %f1850, %f139;
	mov.f32 	%f1874, 0f00000000;  	// 0
	setp.lt.ftz.f32 	%p150, %f1873, %f1874;
	@!%p150 bra 	$Lt_128_317442;
	.loc	5	242	0
	neg.ftz.f32 	%f1875, %f1873;
	lg2.approx.ftz.f32 	%f1876, %f1875;
	mov.f32 	%f1877, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1878, %f1876, %f1877;
	ex2.approx.ftz.f32 	%f1879, %f1878;
	neg.ftz.f32 	%f1880, %f1879;
	bra.uni 	$LDWendi___log2f_305_30;
$Lt_128_317442:
	.loc	5	244	0
	lg2.approx.ftz.f32 	%f1881, %f1873;
	mov.f32 	%f1882, 0f3ee8ba2e;  	// 0.454545
	mul.ftz.f32 	%f1883, %f1881, %f1882;
	ex2.approx.ftz.f32 	%f1880, %f1883;
$LDWendi___log2f_305_30:
	.loc	6	240	0
	mov.f32 	%f139, %f1880;
$Lt_128_315906:
	.loc	6	243	0
	mov.f32 	%f140, %f1846;
$Lt_128_315394:
$Lt_128_270338:
	@!%p8 bra 	$Lt_128_318210;
	.loc	21	126	0
	mul.lo.u64 	%rd17, %rd11, 8;
	add.u64 	%rd18, %rd12, %rd17;
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f137;
	mov.b32		%r110, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f138;
	mov.b32		%r111, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f139;
	mov.b32		%r112, %b1; }
	{ .reg .b32 %b1;
	cvt.rn.ftz.f16.f32	%b1, %f140;
	mov.b32		%r113, %b1; }
	st.global.v4.u16 	[%rd18+0], {%r110,%r111,%r112,%r113};
	.loc	6	246	0
	bra.uni 	$Lt_128_317954;
$Lt_128_318210:
	.loc	21	126	0
	mul.lo.u64 	%rd19, %rd11, 16;
	add.u64 	%rd20, %rd12, %rd19;
	st.global.v4.f32 	[%rd20+0], {%f137,%f138,%f139,%f140};
$Lt_128_317954:
$Lt_128_269826:
	.loc	6	714	0
	exit;
$LDWend_HorizontalOnlyScaleKernel:
	} // HorizontalOnlyScaleKernel
	.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,188,116,179,63,0,0,128,63,152,50,176,190,158,209,54,191,0,0,128,63,229,208,226,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70,246,130,66,145,141,0,67,94,186,199,65,33,48,23,194,240,103,148,194,0,0,224,66,0,0,224,66,111,146,187,194,70,182,145,193};
	.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,182,23,205,59,37,160,149,59,40,15,201,186,156,239,80,187,37,160,149,59,236,155,1,60,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219,121,131,62,152,14,1,63,18,131,200,61,174,199,23,190,238,252,148,190,197,224,224,62,197,224,224,62,217,78,188,190,174,71,146,189};
	.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,160,74,204,63,127,10,149,63,254,148,200,190,184,30,80,191,127,10,149,63,78,26,1,64,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135,22,153,62,162,69,22,63,213,120,233,61,166,27,44,190,39,241,168,190,250,254,254,62,250,254,254,62,43,135,213,190,59,223,165,189};
	.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0,0,128,63,0,0,0,0,72,193,178,63,0,0,128,63,143,130,175,190,225,26,54,191,0,0,128,63,20,238,225,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113,125,152,66,92,175,21,67,92,143,232,65,158,111,43,194,49,72,168,194,0,0,254,66,0,0,254,66,170,177,212,194,88,57,165,193};
	.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129,128,128,59,0,0,0,0,188,116,179,59,129,128,128,59,194,50,176,186,179,209,54,187,129,128,128,59,229,208,226,59,0,0,0,0};
	.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0,0,128,63,0,0,0,0,12,147,201,63,0,0,128,63,221,209,63,190,243,173,239,190,0,0,128,63,77,132,237,63,0,0,0,0};
	.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106,60,58,66,6,161,28,67,244,253,124,65,223,79,205,193,8,172,172,194,0,0,224,66,0,0,224,66,195,117,203,194,236,81,36,193};
	.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37,160,149,59,0,0,0,0,239,94,230,59,37,160,149,59,33,57,91,186,178,245,8,187,37,160,149,59,82,185,7,60,0,0,0,0};
	.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207,247,58,62,53,62,29,63,231,251,125,61,147,24,206,61,23,89,173,190,197,224,224,62,197,224,224,62,12,66,204,190,195,245,36,189};
	.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127,10,149,63,0,0,0,0,147,120,229,63,127,10,149,63,53,94,90,190,205,108,8,191,127,10,149,63,154,49,7,64,0,0,0,0};
	.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0,0,128,63,23,100,203,61,1,77,68,62,0,0,0,0,18,103,125,63,10,158,226,189,0,0,0,0,61,98,148,189,249,191,123,63};
	.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0,0,128,63,122,165,236,189,179,237,84,190,0,0,0,0,204,98,130,63,216,188,234,61,0,0,0,0,74,179,153,61,234,61,131,63};
	.const .align 4 .b8 kYCbCrOffset[12] = {0,0,128,65,0,0,0,67,0,0,0,67};
	.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0,0,0,0,0,0,0,67,0,0,0,67};
	.const .align 4 .b8 kRGB32f_To_YIQ[36] = {135,22,153,62,162,69,22,63,213,120,233,61,216,128,24,63,27,133,140,190,149,124,164,190,236,135,88,62,134,200,5,191,22,77,159,62};
	.const .align 4 .b8 kYIQ_To_RGB32f[36] = {0,0,128,63,20,208,116,63,219,249,30,63,0,0,128,63,177,80,139,190,2,188,37,191,0,0,128,63,45,178,141,191,85,48,218,63};

