Skip to content

Latest commit

 

History

History
354 lines (267 loc) · 9.66 KB

161.md

File metadata and controls

354 lines (267 loc) · 9.66 KB
layout title
post
第161期

C++ 中文周刊 2024-06-23 第161期

周刊项目地址

公众号

点击「查看原文」跳转到 GitHub 上对应文件,链接就可以点击了

qq群 点击进入

RSS

欢迎投稿,推荐或自荐文章/软件/资源等,评论区留言

本期文章由 HNY lhmouse 赞助

lhmouse的项目 mcfthread/asteria 推荐大家关注


资讯

标准委员会动态/ide/编译器信息放在这里

编译器信息最新动态推荐关注hellogcc公众号 本周更新 第268期

boost 全新网站 高大上

文章

可以参考

看一乐

看一乐

clang ffast-math有bug

__m128 test(const __m128 vec)
{
    return _mm_sqrt_ps(vec);
}
/*
test:                                   # @test
        sqrtps  xmm0, xmm0
        ret
*/

正常生成汇编没问题,但ffast-math生成的汇编有问题, 会替换成rsqrtps然后由于精度问题退化成牛顿法,反而满了我靠

.LCPI0_0:
        .long   0xbf000000                      # float -0.5
        .long   0xbf000000                      # float -0.5
        .long   0xbf000000                      # float -0.5
        .long   0xbf000000                      # float -0.5
.LCPI0_1:
        .long   0xc0400000                      # float -3
        .long   0xc0400000                      # float -3
        .long   0xc0400000                      # float -3
        .long   0xc0400000                      # float -3
test:
        rsqrtps xmm1, xmm0
        movaps  xmm2, xmm0
        mulps   xmm2, xmm1
        movaps  xmm3, xmmword ptr [rip + .LCPI0_0] # xmm3 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
        mulps   xmm3, xmm2
        mulps   xmm2, xmm1
        addps   xmm2, xmmword ptr [rip + .LCPI0_1]
        mulps   xmm2, xmm3
        xorps   xmm1, xmm1
        cmpneqps        xmm0, xmm1
        andps   xmm0, xmm2
        ret

为了规避只好手写汇编

__m128 test(__m128 vec)
{
    __asm__ ("sqrtps %1, %0" : "=x"(vec) : "x"(vec));
    return vec;
}

但是手写汇编优化不彻底,该有的常量折叠没做

__attribute__((always_inline)) __m128 test(__m128 vec)
{
    __asm__ ("sqrtps %1, %0" : "=x"(vec) : "x"(vec));
    return vec;
}

__m128 call_test()
{
    return test(_mm_setr_ps(1.f, 2.f, 3.f, 4.f));
}
test:
        sqrtps  xmm0, xmm0
        ret
.LCPI1_0:
        .long   0x3f800000                      # float 1
        .long   0x40000000                      # float 2
        .long   0x40400000                      # float 3
        .long   0x40800000                      # float 4
call_test:
        movaps  xmm0, xmmword ptr [rip + .LCPI1_0] # xmm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
        sqrtps  xmm0, xmm0
        ret

sqrtps并没有优化掉

手动折叠, __builtin_constant_p

__attribute__((always_inline)) __m128 test(__m128 vec)
{
    if (__builtin_constant_p(vec))
    {
        return _mm_sqrt_ps(vec);
    }

    __asm__ ("sqrtps %1, %0" : "=x"(vec) : "x"(vec));
    return vec;
}

__m128 call_test()
{
    return test(_mm_setr_ps(1.f, 2.f, 3.f, 4.f));
}

汇编

call_test:
        movaps  xmm0, xmmword ptr [rip + .LCPI11_0] # xmm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
        sqrtps  xmm0, xmm0
        ret

并没有优化掉??又有个bug,__builtin_constant_p不能用于数组,得手动展开

__attribute__((always_inline)) __m128 test(__m128 vec)
{
    if (__builtin_constant_p(vec[0]) &&
      __builtin_constant_p(vec[1]) &&
      __builtin_constant_p(vec[2]) &&
      __builtin_constant_p(vec[3]))
    {
        return _mm_sqrt_ps(vec);
    }

    __asm__ ("sqrtps %1, %0" : "=x"(vec) : "x"(vec));
    return vec;
}

__m128 call_test()
{
    return test(_mm_setr_ps(1.f, 2.f, 3.f, 4.f));
}
.LCPI15_0:
        .long   0x3f800000                      # float 1
        .long   0x3fb504f3                      # float 1.41421354
        .long   0x3fddb3d7                      # float 1.73205078
        .long   0x40000000                      # float 2
call_test:
        movaps  xmm0, xmmword ptr [rip + .LCPI15_0] # xmm0 = [1.0E+0,1.41421354E+0,1.73205078E+0,2.0E+0]
        ret

折叠了

godbolt

c++20之前,文件的时间不够直观,得转一手

auto filetime = fs::last_write_time(myPath);
std::time_t convfiletime = std::chrono::system_clock::to_time_t(filetime);
std::cout << "Updated: " << std::ctime(&convfiletime) << '\n';

c++20可以直接用了

auto temp = std::filesystem::temp_directory_path() / "example.txt";
std::ofstream(temp.c_str()) << "Hello, World!";
auto ftime = std::filesystem::last_write_time(temp);
std::cout << std::format("File write time is {0:%R} on {0:%F}\n", ftime);
std::cout << ftime;
// File write time is 14:52 on 2024-06-22
// 2024-06-22 14:52:46.632061324

godbolt

这个之前讲过,就是这个例子

#include <iostream>
#include <string_view>
#include <cstring>

struct Example {
    char data[6] = "hello";
    std::string_view sv = data;
    ~Example() { 
        strcpy(data, "bye");
        std::cout <<"~Example: "<< sv << '\n'; 
    }
};

int main() {
    auto&& local = Example().sv;  // Here we extend lifetime of entire Example
    std::cout << local << '\n'; // 打印hello
}

Example并不会立即析构,生命周期被local延长了,local打印hello

godbolt

这个例子来自这里

列举了内核里使用count by的例子,比如

-    cmd.cmd.scan_type = WMI_ACTIVE_SCAN;
-    cmd.cmd.num_channels = 0;
+    cmd->scan_type = WMI_ACTIVE_SCAN;
+    cmd->num_channels = 0;
     n = min(request->n_channels, 4U);
     for (i = 0; i < n; i++) {
         int ch = request->channels[i]->hw_value;
@@ -991,7 +988,8 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
             continue;
         }
         /* 0-based channel indexes */
-        cmd.cmd.channel_list[cmd.cmd.num_channels++].channel = ch - 1;
+        cmd->num_channels++;
+        cmd->channel_list[cmd->num_channels - 1].channel = ch - 1;
         wil_dbg_misc(wil, "Scan for ch %d  : %d MHz\n", ch,
                  request->channels[i]->center_freq);
     }
 ...
--- a/drivers/net/wireless/ath/wil6210/wmi.h
+++ b/drivers/net/wireless/ath/wil6210/wmi.h
@@ -474,7 +474,7 @@ struct wmi_start_scan_cmd {
     struct {
         u8 channel;
         u8 reserved;
-    } channel_list[];
+    } channel_list[] __counted_by(num_channels);
 } __packed;

没啥说的。就是针对变长数组做的一个patch,帮助编译器分析的,类似guard_by

就是异或链表,代码不贴了,谁这么写离我远点

字符串复制要注意/0截断问题

看不懂

视频

介绍float特殊类型

#include <bit>
#include <concepts>
#include <cstdint>
#include <format>
#include <iomanip>
#include <iostream>
#include <stdfloat>

void explore_float16(std::uint16_t val) {
  const auto f16 = std::bit_cast<std::float16_t>(val);
  const auto bf16 = std::bit_cast<std::bfloat16_t>(val);
  std::cout << std::format(
      "{:#016b} {:#04x} {} {}f16 (0b{:01b}'{:05b}'{:010b}) {}bf16 "
      "(0b{:01b}'{:08b}'{:07b})\n",
      val, val, val, f16, (val >> 15) & 0b1, (val >> 10) & 0b11111,
      (val & 0b1111111111), bf16, (val >> 15) & 0b1, (val >> 7) & 0b11111111,
      (val & 0b1111111));
}

int main() {
  explore_float16(0b0'01101'0101010101);
  explore_float16(0b0'11110'1111111111);
  explore_float16(0b1'11100'0011111111);
}

godbolt

开源项目介绍

互动环节

最近的热点消息那必然是各个群里都传的过劳事件了,大家保重身体不要太拼了,起码睡够,不睡够干不了活的,起码闭眼够


上一期