awk函数详解与使用技巧-编程阁

字符串函数

以下是 awk 中常用的字符串函数详解，包含语法和示例：

1.length() - 字符串长度

# 语法 length([string]) # 示例 echo "hello" | awk '{print length()}' # 输出 5 echo "hello" | awk '{print length($0)}' # 输出 5 echo "" | awk '{print length("test")}' # 输出 4

2.index() - 查找子串位置

# 语法 index(string, substring) # 返回位置（从1开始），未找到返回0 # 示例 echo "hello world" | awk '{print index($0, "world")}' # 输出 7 echo "hello" | awk '{print index($0, "x")}' # 输出 0

3.substr() - 提取子串

# 语法 substr(string, start [, length]) # 示例 echo "hello world" | awk '{print substr($0, 7, 5)}' # 输出 "world" echo "hello" | awk '{print substr($0, 2, 3)}' # 输出 "ell" echo "hello" | awk '{print substr($0, 3)}' # 输出 "llo"

4.split() - 分割字符串

# 语法 split(string, array [, fieldsep]) # 示例 echo "a,b,c,d" | awk '{ n = split($0, arr, ",") for(i=1; i<=n; i++) print arr[i] }' # 输出： # a # b # c # d

5.match() - 正则匹配

# 语法 match(string, regexp) # 返回匹配位置，设置RSTART和RLENGTH # 示例 echo "hello 123 world" | awk '{ if (match($0, /[0-9]+/)) { print "位置:", RSTART, "长度:", RLENGTH print "匹配内容:", substr($0, RSTART, RLENGTH) } }' # 输出： # 位置: 7 长度: 3 # 匹配内容: 123

6.sub() - 替换第一个匹配

# 语法 sub(regexp, replacement [, target]) # 示例 echo "foo bar foo" | awk '{sub(/foo/, "FOO"); print}' # 输出：FOO bar foo echo "test" | awk '{sub(/t/, "T", $0); print}' # 明确指定target # 输出：Test

7.gsub() - 全局替换

# 语法 gsub(regexp, replacement [, target]) # 示例 echo "foo bar foo" | awk '{gsub(/foo/, "FOO"); print}' # 输出：FOO bar FOO # 统计替换次数 echo "a b a c a" | awk '{print gsub(/a/, "A") " 次替换"}' # 输出：3 次替换

8.toupper() / tolower() - 大小写转换

# 语法 toupper(string) tolower(string) # 示例 echo "Hello World" | awk '{print toupper($0)}' # 输出 HELLO WORLD echo "HELLO" | awk '{print tolower($0)}' # 输出 hello

9.sprintf() - 格式化字符串

# 语法 sprintf(format, expression1, ...) # 示例 awk 'BEGIN { str = sprintf("姓名: %s, 年龄: %d, 分数: %.2f", "张三", 20, 95.5) print str }' # 输出：姓名: 张三, 年龄: 20, 分数: 95.50

10.gensub() - 通用替换（GNU awk特有）

# 语法 gensub(regexp, replacement, how [, target]) # 示例 echo "a1 b2 c3" | awk '{print gensub(/([a-z])([0-9])/, "\\2\\1", "g")}' # 输出：1a 2b 3c # how参数：数字（第几个匹配）或 "g"（全局） echo "a b a c a" | awk '{print gensub(/a/, "A", 2)}' # 输出：a b A c a

11.strtonum() - 字符串转数字（GNU awk）

# 语法 strtonum(string) # 自动识别八进制、十六进制 # 示例 awk 'BEGIN { print strtonum("123") # 123 print strtonum("0xFF") # 255 print strtonum("0777") # 511（八进制） }'

12.asort() / asorti() - 数组排序

# 语法 asort(source [, dest [, how]]) # 按值排序 asorti(source [, dest [, how]]) # 按键排序 # 示例 awk 'BEGIN { arr["z"] = 3 arr["a"] = 1 arr["m"] = 2 n = asort(arr, sorted) for(i=1; i<=n; i++) print sorted[i] # 输出：1 2 3 m = asorti(arr, keys) for(i=1; i<=m; i++) print keys[i] # 输出：a m z }'

13.patsplit() - 模式分割（GNU awk）

# 语法 patsplit(string, array [, fieldpat]) # 示例 echo "abc123def456" | awk '{ n = patsplit($0, arr, /[a-z]+/) for(i=1; i<=n; i++) print arr[i] }' # 输出： # abc # def

综合示例

# 处理日志文件，提取并格式化 echo '2024-01-15 10:30:45 ERROR: Connection failed' | awk '{ # 提取时间部分 time_part = substr($0, 1, 19) # 提取错误级别 match($0, /(ERROR|WARN|INFO)/, arr) level = arr[0] # 提取错误信息 msg_start = index($0, ": ") if (msg_start > 0) { message = substr($0, msg_start + 2) } # 格式化输出 printf("[%s] %-5s: %s\n", time_part, toupper(level), substr(message, 1, 20) "...") }'

使用技巧

链式调用

echo " Hello World " | awk '{ print toupper(substr($0, 3, 5)) }'

处理多行

# 连接多行 awk 'BEGIN {ORS=" "} {gsub(/\n/, " "); print} END {print "\n"}' file.txt

反向引用

echo "abc123" | awk '{print gensub(/([a-z]+)([0-9]+)/, "\\2-\\1", "g")}' # 输出：123-abc

非字符串函数

1.数值处理函数

1.1`int()`- 取整函数

# 语法 int(x) # 返回x的整数部分（向0取整） # 示例 awk 'BEGIN { print int(3.14) # 3 print int(-3.14) # -3 print int(7.99) # 7 }'

1.2`sqrt()`- 平方根函数

# 语法 sqrt(x) # 返回x的平方根 # 示例 awk 'BEGIN { print sqrt(4) # 2 print sqrt(2) # 1.41421 print sqrt(0) # 0 print sqrt(-1) # 产生错误（NaN） }'

1.3`exp()`- 指数函数

# 语法 exp(x) # 返回e的x次幂 # 示例 awk 'BEGIN { print exp(0) # 1 print exp(1) # 2.71828 print exp(2) # 7.38906 printf "%.4f\n", exp(1) # 2.7183 }'

1.4`log()`- 自然对数

# 语法 log(x) # 返回x的自然对数（以e为底） # 示例 awk 'BEGIN { print log(1) # 0 print log(2.71828) # 1（近似） print log(10) # 2.30259 }'

1.5`sin()`,`cos()`,`atan2()`- 三角函数

# 语法 sin(x) # 正弦（x为弧度） cos(x) # 余弦（x为弧度） atan2(y, x) # 反正切（返回弧度） # 示例 awk 'BEGIN { pi = 3.14159265 print sin(pi/2) # 1（近似） print cos(0) # 1 print atan2(1, 1)*180/pi # 45度 }'

1.6`rand()`- 随机数生成

# 语法 rand() # 返回0到1之间的随机数（不包括1） # 示例 awk 'BEGIN { srand() # 用当前时间初始化随机数种子 for(i=1; i<=5; i++) { print rand() } }'

1.7`srand()`- 设置随机种子

# 语法 srand([expr]) # 设置随机数种子，返回先前的种子 # 示例 awk 'BEGIN { # 设置固定种子，使随机数可重复 srand(1234) print rand() # 每次都相同 print rand() # 使用当前时间作为种子 srand() print rand() # 每次运行不同 }'

2.时间函数（GNU awk）

2.1`systime()`- 当前时间戳

# 语法 systime() # 返回从1970-01-01到现在的秒数 # 示例 awk 'BEGIN { print "当前时间戳:", systime() print "格式化时间:", strftime("%Y-%m-%d %H:%M:%S", systime()) }'

2.2`mktime()`- 构造时间戳

# 语法 mktime(datespec) # datespec格式：YYYY MM DD HH MM SS [DST] # 示例 awk 'BEGIN { # 构造时间：2024-01-15 14:30:00 ts = mktime("2024 01 15 14 30 00") print "时间戳:", ts print "格式化:", strftime("%c", ts) }'

2.3`strftime()`- 格式化时间

# 语法 strftime([format [, timestamp [, utc-flag]]]) # 常用格式符 # %Y - 年 %m - 月 %d - 日 # %H - 时(24) %I - 时(12) %M - 分 %S - 秒 # %A - 星期全称 %a - 星期简称 # %B - 月份全称 %b - 月份简称 # 示例 awk 'BEGIN { now = systime() print strftime("日期: %Y-%m-%d", now) print strftime("时间: %H:%M:%S", now) print strftime("完整: %A, %B %d, %Y %I:%M:%S %p", now) # 指定时间戳 print strftime("%Y-%m-%d", 1700000000) }'

3.数组函数

SUBSEP

这是一个内置变量，用于数组下标分隔符，默认值是"\034"。在将数组转换为字符串时使用。

3.1 数组操作技巧

# 创建和遍历数组 awk 'BEGIN { # 创建数组 arr["name"] = "张三" arr["age"] = 25 arr["score"] = 95.5 # 遍历数组 for(key in arr) { print key " = " arr[key] } }' # 多维数组（使用SUBSEP分隔） awk 'BEGIN { arr["2024", "01", "15"] = 100 arr["2024", "01", "16"] = 200 for(key in arr) { split(key, idx, SUBSEP) print idx[1] "-" idx[2] "-" idx[3] ":", arr[key] } }'

3.2 删除数组元素

# 语法 delete array[index] # 删除单个元素 delete array # 删除整个数组 # 示例 awk 'BEGIN { arr[1] = "a"; arr[2] = "b"; arr[3] = "c" print "删除前长度:", length(arr) delete arr[2] print "删除元素2后:" for(i in arr) print i ":" arr[i] delete arr print "删除整个数组后长度:", length(arr) }'

4.位操作函数（GNU awk）

4.1 基本位操作

# 语法 and(v1, v2) # 按位与 or(v1, v2) # 按位或 xor(v1, v2) # 按位异或 lshift(num, count) # 左移 rshift(num, count) # 右移 compl(num) # 按位取反 # 示例 awk 'BEGIN { a = 10 # 1010 b = 6 # 0110 print "a AND b =", and(a, b) # 2 (0010) print "a OR b =", or(a, b) # 14 (1110) print "a XOR b =", xor(a, b) # 12 (1100) print "a左移2位 =", lshift(a, 2) # 40 (101000) print "a右移1位 =", rshift(a, 1) # 5 (0101) print "a取反 =", compl(a) # -11 (取决于位数) }'

5.类型判断函数

5.1 类型检测

# 判断变量类型（GNU awk扩展） function get_type(var) { if (var == var+0) return "number" if (var == "") return "unassigned" return "string" } # 示例 awk 'BEGIN { x = 123 y = "hello" z = "" print "x类型:", get_type(x) print "y类型:", get_type(y) print "z类型:", get_type(z) }'

6.I/O相关函数

6.1`close()`- 关闭文件/管道

# 语法 close(expression) # 关闭文件或管道 # 示例 awk '{ # 处理文件 while((getline line < "input.txt") > 0) { print "读取:", line } close("input.txt") # 关闭命令管道 cmd = "date" cmd | getline date_str print "当前日期:", date_str close(cmd) }'

6.2`system()`- 执行系统命令

# 语法 system(command) # 执行命令，返回退出状态 # 示例 awk 'BEGIN { # 执行简单命令 status = system("echo 'Hello from system'") print "退出状态:", status # 执行复杂命令 system("ls -la | wc -l") }'

7.getline函数详解

7.1 不同形式的getline

# 1. 从当前文件读取下一行 awk '{ print NR ":", $0 }' file.txt # 2. 从指定文件读取 awk '{ while((getline line < "other.txt") > 0) { print "其他文件:", line } close("other.txt") }' file.txt # 3. 从管道读取 awk 'BEGIN { "date +%Y-%m-%d" | getline today print "今天日期:", today }' # 4. 读取到变量 awk '{ getline # 读取下一行到$0 print "下一行:", $0 }' file.txt

7.2 getline返回值

# getline返回值说明： # 1 - 成功读取 # 0 - 文件结束 # -1 - 错误 awk '{ while((ret = (getline line < "data.txt")) > 0) { print "成功读取:", line } if (ret == 0) print "文件结束" if (ret < 0) print "读取错误:", ERRNO }'

8.数学计算技巧

8.1 浮点数精度控制

# 使用printf控制精度 awk 'BEGIN { pi = 3.141592653589793 e = 2.718281828459045 printf "pi = %.2f\n", pi # 3.14 printf "e = %.5f\n", e # 2.71828 printf "科学计数: %.2e\n", 1234567 # 1.23e+06 }' # 舍入函数 function round(x, digits) { return sprintf("%." digits "f", x) + 0 } awk 'BEGIN { print round(3.14159, 2) # 3.14 print round(3.14159, 3) # 3.142 }'

8.2 统计计算

# 计算平均值、最大值、最小值 awk '{ sum += $1 count++ if (NR == 1 || $1 < min) min = $1 if (NR == 1 || $1 > max) max = $1 } END { if (count > 0) { avg = sum / count print "数量:", count print "总和:", sum print "平均:", avg print "最小:", min print "最大:", max print "范围:", max - min } }' numbers.txt

9.时间计算技巧

9.1 时间差计算

# 计算时间差（秒数） function time_diff(start, end) { return end - start } # 计算天数差 function days_diff(start, end) { return (end - start) / 86400 # 86400秒/天 } # 示例 awk 'BEGIN { # 2024-01-01 和 2024-01-15 start = mktime("2024 01 01 00 00 00") end = mktime("2024 01 15 00 00 00") print "相差秒数:", time_diff(start, end) print "相差天数:", days_diff(start, end) }'

9.2 添加时间间隔

# 添加天数 function add_days(timestamp, days) { return timestamp + days * 86400 } # 添加小时 function add_hours(timestamp, hours) { return timestamp + hours * 3600 } # 示例 awk 'BEGIN { now = systime() next_week = add_days(now, 7) print "现在:", strftime("%Y-%m-%d %H:%M:%S", now) print "一周后:", strftime("%Y-%m-%d %H:%M:%S", next_week) }'

10.实战技巧

10.1 性能优化

# 1. 使用内置函数而不是自定义函数 # 2. 减少数组操作 # 3. 使用位运算代替数学运算（当适用时） # 判断奇偶数的优化 awk '{ # 传统方法 if ($1 % 2 == 0) print "偶数" # 位运算方法（更快） if (and($1, 1) == 0) print "偶数" }'

10.2 错误处理

# 安全除法函数 function safe_divide(a, b, default_value) { if (b == 0) { return (default_value != "") ? default_value : "NaN" } return a / b } # 安全数值转换 function safe_num(str, default_value) { if (str == str+0) { return str+0 } return (default_value != "") ? default_value : 0 } # 示例 awk 'BEGIN { print safe_divide(10, 0, 0) # 输出 0 print safe_divide(10, 2) # 输出 5 print safe_num("123") # 123 print safe_num("abc", 0) # 0 }'

11.综合示例：日志分析器

#!/usr/bin/awk -f # 日志分析脚本 BEGIN { printf("%-20s %-10s %-10s %-10s\n", "时间", "成功", "失败", "成功率") print "------------------------------------------------------" } # 假设日志格式: [时间] 状态 耗时ms { # 解析时间戳 match($0, /\[([0-9:]+)\]/, time_arr) time_key = substr(time_arr[1], 1, 5) # 只取小时和分钟 # 解析状态 if (match($0, /状态:([^ ]+)/, status_arr)) { status = status_arr[1] } # 解析耗时 if (match($0, /耗时:([0-9]+)/, time_arr)) { duration = time_arr[1]+0 total_time[time_key] += duration count[time_key]++ } # 统计状态 if (status == "成功") { success[time_key]++ } else if (status == "失败") { failure[time_key]++ } } END { # 计算并输出统计结果 for (time in success) { total = success[time] + failure[time] if (total > 0) { rate = success[time] / total * 100 avg_time = total_time[time] / count[time] printf("%-20s %-10d %-10d %-9.1f%% (平均耗时:%.0fms)\n", time, success[time], failure[time], rate, avg_time) } } # 总体统计 print "\n总体统计:" total_success = 0 total_failure = 0 for (time in success) { total_success += success[time] total_failure += failure[time] } total = total_success + total_failure printf("总请求数: %d\n", total) printf("成功率: %.1f%%\n", total_success/total*100) printf("平均耗时: %.0fms\n", total_time_all/count_all) }

这些非字符串函数让awk成为一个强大的数据处理工具，特别适合数值计算、时间处理、统计分析等任务。掌握这些函数可以大大提高awk脚本的处理能力。

字符串函数

1.length() - 字符串长度

2.index() - 查找子串位置

3.substr() - 提取子串

4.split() - 分割字符串

5.match() - 正则匹配

6.sub() - 替换第一个匹配

7.gsub() - 全局替换

8.toupper() / tolower() - 大小写转换

9.sprintf() - 格式化字符串

10.gensub() - 通用替换（GNU awk特有）

11.strtonum() - 字符串转数字（GNU awk）

12.asort() / asorti() - 数组排序

13.patsplit() - 模式分割（GNU awk）

综合示例

使用技巧

链式调用

处理多行

非字符串函数

1.数值处理函数

1.1int()- 取整函数

1.2sqrt()- 平方根函数

1.3exp()- 指数函数

1.4log()- 自然对数

1.5sin(),cos(),atan2()- 三角函数

1.6rand()- 随机数生成

1.7srand()- 设置随机种子

2.时间函数（GNU awk）

2.1systime()- 当前时间戳

2.2mktime()- 构造时间戳

2.3strftime()- 格式化时间

3.数组函数

SUBSEP

3.1 数组操作技巧

3.2 删除数组元素

4.位操作函数（GNU awk）

4.1 基本位操作

5.类型判断函数

5.1 类型检测

6.I/O相关函数

6.1close()- 关闭文件/管道

6.2system()- 执行系统命令

7.getline函数详解

7.1 不同形式的getline

7.2 getline返回值

8.数学计算技巧

8.1 浮点数精度控制

8.2 统计计算

9.时间计算技巧

9.1 时间差计算

9.2 添加时间间隔

10.实战技巧

10.1 性能优化

10.2 错误处理

11.综合示例：日志分析器

DownKyi终极指南：B站视频下载全攻略，轻松获取8K超高清画质

YOLOv8森林防火监控：烟雾、火点早期视觉识别预警

jscope使用教程：通俗解释多通道同步原理

DownKyi完全攻略：解锁B站视频下载的实用指南

智能电话号码定位系统：一键查询手机号精准位置的开源解决方案

虚拟手柄模拟驱动终极指南：5分钟快速安装与完整使用教程

1.1`int()`- 取整函数

1.2`sqrt()`- 平方根函数

1.3`exp()`- 指数函数

1.4`log()`- 自然对数

1.5`sin()`,`cos()`,`atan2()`- 三角函数

1.6`rand()`- 随机数生成

1.7`srand()`- 设置随机种子

2.1`systime()`- 当前时间戳

2.2`mktime()`- 构造时间戳

2.3`strftime()`- 格式化时间

6.1`close()`- 关闭文件/管道

6.2`system()`- 执行系统命令