| @@ -90,7 +90,8 @@ static float Fp16ToFloat(const uint16_t &fp_val) { | |||
| hf_exp--; | |||
| } | |||
| uint32_t e_ret, m_ret; | |||
| uint32_t e_ret; | |||
| uint32_t m_ret; | |||
| uint32_t s_ret = hf_sign; | |||
| if (hf_man == 0) { | |||
| e_ret = 0; | |||
| @@ -573,13 +574,18 @@ static uint16_t Fp16Sub(uint16_t v_1, uint16_t v_2) { | |||
| /// @brief Performing fp16_t multiplication | |||
| /// @return Return fp16_t result of multiplying this and fp | |||
| static uint16_t Fp16Mul(uint16_t v_1, uint16_t v_2) { | |||
| uint16_t s_a, s_b; | |||
| int16_t e_a, e_b; | |||
| uint32_t m_a, m_b; | |||
| uint16_t s_ret, m_ret; | |||
| uint16_t s_a; | |||
| uint16_t s_b; | |||
| int16_t e_a; | |||
| int16_t e_b; | |||
| uint32_t m_a; | |||
| uint32_t m_b; | |||
| uint16_t s_ret; | |||
| uint16_t m_ret; | |||
| int16_t e_ret; | |||
| uint32_t mul_m; | |||
| uint16_t m_a_tmp, m_b_tmp; | |||
| uint16_t m_a_tmp; | |||
| uint16_t m_b_tmp; | |||
| // 1.Extract | |||
| ExtractFp16(v_1, s_a, e_a, m_a_tmp); | |||
| ExtractFp16(v_2, s_b, e_b, m_b_tmp); | |||
| @@ -644,11 +650,15 @@ static uint16_t Fp16Div(uint16_t v_1, uint16_t v_2) { | |||
| } else if (FP16_IS_ZERO(v_1)) { | |||
| ret = 0u; | |||
| } else { | |||
| uint16_t s_a, s_b; | |||
| int16_t e_a, e_b; | |||
| uint64_t m_a, m_b; | |||
| uint16_t s_a; | |||
| uint16_t s_b; | |||
| int16_t e_a; | |||
| int16_t e_b; | |||
| uint64_t m_a; | |||
| uint64_t m_b; | |||
| float m_div; | |||
| uint16_t m_a_tmp, m_b_tmp; | |||
| uint16_t m_a_tmp; | |||
| uint16_t m_b_tmp; | |||
| // 1.Extract | |||
| ExtractFp16(v_1, s_a, e_a, m_a_tmp); | |||
| ExtractFp16(v_2, s_b, e_b, m_b_tmp); | |||
| @@ -742,9 +752,12 @@ bool fp16_t::operator!=(const fp16_t &fp) const { | |||
| return result; | |||
| } | |||
| bool fp16_t::operator>(const fp16_t &fp) const { | |||
| uint16_t s_a, s_b; | |||
| uint16_t e_a, e_b; | |||
| uint16_t m_a, m_b; | |||
| uint16_t s_a; | |||
| uint16_t s_b; | |||
| uint16_t e_a; | |||
| uint16_t e_b; | |||
| uint16_t m_a; | |||
| uint16_t m_b; | |||
| bool result = true; | |||
| // 1.Extract | |||
| @@ -823,9 +836,11 @@ fp16_t &fp16_t::operator=(const fp16_t &fp) { | |||
| return *this; | |||
| } | |||
| fp16_t &fp16_t::operator=(const float &f_val) { | |||
| uint16_t s_ret, m_ret; | |||
| uint16_t s_ret; | |||
| uint16_t m_ret; | |||
| int16_t e_ret; | |||
| uint32_t e_f, m_f; | |||
| uint32_t e_f; | |||
| uint32_t m_f; | |||
| const uint32_t ui32_v = *(reinterpret_cast<const uint32_t *>(&f_val)); // 1:8:23bit sign:exp:man | |||
| uint32_t m_len_delta; | |||
| @@ -1180,40 +1195,20 @@ fp16_t &fp16_t::operator=(const double &d_val) { | |||
| } | |||
| // convert | |||
| fp16_t::operator float() const { | |||
| return Fp16ToFloat(val); | |||
| } | |||
| fp16_t::operator double() const { | |||
| return Fp16ToDouble(val); | |||
| } | |||
| fp16_t::operator int8_t() const { | |||
| return Fp16ToInt8(val); | |||
| } | |||
| fp16_t::operator uint8_t() const { | |||
| return Fp16ToUInt8(val); | |||
| } | |||
| fp16_t::operator int16_t() const { | |||
| return Fp16ToInt16(val); | |||
| } | |||
| fp16_t::operator uint16_t() const { | |||
| return Fp16ToUInt16(val); | |||
| } | |||
| fp16_t::operator int32_t() const { | |||
| return Fp16ToInt32(val); | |||
| } | |||
| fp16_t::operator uint32_t() const { | |||
| return Fp16ToUInt32(val); | |||
| } | |||
| fp16_t::operator float() const { return Fp16ToFloat(val); } | |||
| fp16_t::operator double() const { return Fp16ToDouble(val); } | |||
| fp16_t::operator int8_t() const { return Fp16ToInt8(val); } | |||
| fp16_t::operator uint8_t() const { return Fp16ToUInt8(val); } | |||
| fp16_t::operator int16_t() const { return Fp16ToInt16(val); } | |||
| fp16_t::operator uint16_t() const { return Fp16ToUInt16(val); } | |||
| fp16_t::operator int32_t() const { return Fp16ToInt32(val); } | |||
| fp16_t::operator uint32_t() const { return Fp16ToUInt32(val); } | |||
| // Cannot be used, just in order to solve the compile error | |||
| fp16_t::operator int64_t() const { | |||
| return 0; | |||
| } | |||
| fp16_t::operator int64_t() const { return 0; } | |||
| // Cannot be used, just in order to solve the compile error | |||
| fp16_t::operator uint64_t() const { | |||
| return 0; | |||
| } | |||
| fp16_t::operator uint64_t() const { return 0; } | |||
| int fp16_t::IsInf() { | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int fp16_t::IsInf() { | |||
| if ((val & kFp16AbsMax) == kFp16ExpMask) { | |||
| if (val & kFp16SignMask) { | |||
| return -1; | |||
| @@ -1225,28 +1220,12 @@ int fp16_t::IsInf() { | |||
| } | |||
| } | |||
| float fp16_t::ToFloat() const { | |||
| return Fp16ToFloat(val); | |||
| } | |||
| double fp16_t::ToDouble() const { | |||
| return Fp16ToDouble(val); | |||
| } | |||
| int8_t fp16_t::ToInt8() const { | |||
| return Fp16ToInt8(val); | |||
| } | |||
| uint8_t fp16_t::ToUInt8() const { | |||
| return Fp16ToUInt8(val); | |||
| } | |||
| int16_t fp16_t::ToInt16() const { | |||
| return Fp16ToInt16(val); | |||
| } | |||
| uint16_t fp16_t::ToUInt16() const { | |||
| return Fp16ToUInt16(val); | |||
| } | |||
| int32_t fp16_t::ToInt32() const { | |||
| return Fp16ToInt32(val); | |||
| } | |||
| uint32_t fp16_t::ToUInt32() const { | |||
| return Fp16ToUInt32(val); | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY float fp16_t::ToFloat() const { return Fp16ToFloat(val); } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY double fp16_t::ToDouble() const { return Fp16ToDouble(val); } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int8_t fp16_t::ToInt8() const { return Fp16ToInt8(val); } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint8_t fp16_t::ToUInt8() const { return Fp16ToUInt8(val); } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int16_t fp16_t::ToInt16() const { return Fp16ToInt16(val); } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint16_t fp16_t::ToUInt16() const { return Fp16ToUInt16(val); } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int32_t fp16_t::ToInt32() const { return Fp16ToInt32(val); } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint32_t fp16_t::ToUInt32() const { return Fp16ToUInt32(val); } | |||
| } // namespace ge | |||