Python常用算法解析，从优雅简洁到高效实战-编程阁

一、Python算法哲学：优雅与效率的平衡

1.1 Python算法的核心优势

1.2 Python的内置算法工具

二、列表与字典算法

2.1 列表推导式的艺术

2.2 字典的高级用法

三、排序与搜索算法

3.1 内置排序的灵活运用

3.2 二分查找与二等分模块

四、图形算法与网络分析

4.1 使用networkx进行图分析

4.2 自定义图算法实现

五、数值计算与科学计算

5.1 使用NumPy进行数值计算

5.2 使用pandas进行数据分析

六、机器学习算法

6.1 使用 scikit-learn

6.2 自定义机器学习算法

如果您喜欢此文章，请收藏、点赞、评论，谢谢，祝您快乐每一天。

一、Python算法哲学：优雅与效率的平衡

1.1 Python算法的核心优势

Python算法设计融合了概率编程范式：

函数式编程：map、filter、reduce、列表推导式
面向对象：自定义数据结构、操作重载
元编程：装饰器、元类、外汇
动态特性：鸭子类型、运行时类型检查

1.2 Python的内置算法工具

# 1. 内置函数（BIFs）中的算法
numbers = [3, 1, 4, 1, 5, 9, 2, 6]

# 排序相关
sorted_nums = sorted(numbers) # 返回新列表
numbers.sort() # 原地排序
reversed_nums = list(reversed(numbers)) # 反转

# 极值查找
max_val = max(numbers)
min_val = min(numbers)
sum_val = sum(numbers)

# 2. 使用key参数进行复杂排序
words = ["apple", "banana", "cherry", "date"]
sorted_words = sorted(words, key=len) # 按长度排序
sorted_words = sorted(words, key=str.lower) # 不区分大小写

# 3. 使用lambda表达式
students = [("Alice", 85), ("Bob", 92), ("Charlie", 78)]
sorted_students = sorted(students, key=lambda x: x[1], reverse=True)

二、列表与字典算法

2.1 列表推导式的艺术

# 1. 基本列表推导式
squares = [x**2 for x in range(10)] # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

# 2. 带条件的列表推导式
even_squares = [x**2 for x in range(10) if x % 2 == 0] # 偶数的平方

# 3. 嵌套列表推导式
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flattened = [num for row in matrix for num in row] # 展平二维列表

# 4. 字典推导式
squares_dict = {x: x**2 for x in range(5)} # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

# 5. 集合推导式
unique_chars = {char for char in "hello world" if char != ' '} # {'h', 'e', 'l', 'o', 'w', 'r', 'd'}

# 6. 生成器表达式（惰性求值）
large_squares = (x**2 for x in range(1000000)) # 不立即生成所有值
sum_of_squares = sum(x**2 for x in range(1000)) # 内存高效

2.2 字典的高级用法

# 1. defaultdict：自动初始化字典
from collections import defaultdict

# 统计单词出现次数
word_counts = defaultdict(int)
for word in text.split():
word_counts[word] += 1

# 按首字母分组
words_by_letter = defaultdict(list)
for word in words:
words_by_letter[word[0]].append(word)

# 2. Counter：计数专用字典
from collections import Counter

# 统计元素出现次数
colors = ['red', 'blue', 'red', 'green', 'blue', 'blue']
color_counts = Counter(colors) # Counter({'blue': 3, 'red': 2, 'green': 1})

# 最常见的n个元素
most_common = color_counts.most_common(2) # [('blue', 3), ('red', 2)]

# 3. OrderedDict：保持插入顺序（Python 3.7+普通dict已有序）
from collections import OrderedDict

# LRU缓存实现
class LRUCache:
def __init__(self, capacity: int):
self.cache = OrderedDict()
self.capacity = capacity

def get(self, key):
if key not in self.cache:
return -1
self.cache.move_to_end(key) # 移动到末尾表示最近使用
return self.cache[key]

def put(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
self.cache.popitem(last=False) # 移除最旧的项

三、排序与搜索算法

3.1 内置排序的灵活运用

# 1. 复杂对象的排序
class Student:
def __init__(self, name, grade, age):
self.name = name
self.grade = grade
self.age = age

def __repr__(self):
return f"Student({self.name}, {self.grade}, {self.age})"

students = [
Student('Alice', 'A', 20),
Student('Bob', 'B', 19),
Student('Charlie', 'A', 21),
Student('David', 'C', 20)
]

# 按多个字段排序
sorted_students = sorted(students, key=lambda s: (s.grade, s.age))

# 2. 使用attrgetter和itemgetter
from operator import attrgetter, itemgetter

# attrgetter用于对象属性
sorted_by_age = sorted(students, key=attrgetter('age'))

# itemgetter用于元组或字典
data = [{'name': 'Alice', 'score': 85},
{'name': 'Bob', 'score': 92},
{'name': 'Charlie', 'score': 78}]
sorted_data = sorted(data, key=itemgetter('score'), reverse=True)

# 3. 稳定排序的特性
# Python的sorted()是稳定排序，相等元素的相对顺序保持不变
items = [('red', 1), ('blue', 1), ('red', 2), ('blue', 2)]
sorted_items = sorted(items, key=lambda x: x[0])
# 结果：[('blue', 1), ('blue', 2), ('red', 1), ('red', 2)]
# 相同颜色的元素保持了原始顺序

3.2 二分查找与二等分模块

import bisect

# 1. 基本二分查找
sorted_list = [1, 3, 5, 7, 9, 11, 13]

# 查找元素位置（返回插入位置）
position = bisect.bisect_left(sorted_list, 7) # 3
position = bisect.bisect_right(sorted_list, 7) # 4（重复元素时有用）

# 2. 插入元素保持有序
bisect.insort_left(sorted_list, 6) # 在适当位置插入6
bisect.insort_right(sorted_list, 8) # 插入8

# 3. 自定义比较函数的二分查找
def binary_search_custom(arr, target, key_func):
"""使用自定义key函数的二分查找"""
lo, hi = 0, len(arr)
while lo < hi:
mid = (lo + hi) // 2
if key_func(arr[mid]) < target:
lo = mid + 1
else:
hi = mid
return lo if lo < len(arr) and key_func(arr[lo]) == target else -1

# 4. 查找范围
def find_range(arr, target):
"""在有序数组中查找目标值的起始和结束位置"""
left = bisect.bisect_left(arr, target)
right = bisect.bisect_right(arr, target)
return (left, right) if left < right else (-1, -1)

# 示例
nums = [5, 7, 7, 8, 8, 8, 10]
start, end = find_range(nums, 8) # (3, 6)

四、图形算法与网络分析

4.1 使用networkx进行图分析

import networkx as nx
import matplotlib.pyplot as plt

# 1. 创建图
G = nx.Graph() # 无向图
# G = nx.DiGraph() # 有向图

# 添加节点和边
G.add_nodes_from([1, 2, 3, 4, 5])
G.add_edges_from([(1, 2), (1, 3), (2, 3), (3, 4), (4, 5)])

# 2. 基本图算法
# 最短路径
shortest_path = nx.shortest_path(G, source=1, target=5) # [1, 3, 4, 5]

# 连通分量
connected_components = list(nx.connected_components(G))

# 度中心性
degree_centrality = nx.degree_centrality(G)

# 3. PageRank算法
pagerank_scores = nx.pagerank(G, alpha=0.85)

# 4. 社区检测
from networkx.algorithms import community

# Louvain方法
communities = community.greedy_modularity_communities(G)

# 5. 可视化
pos = nx.spring_layout(G) # 布局算法
nx.draw(G, pos, with_labels=True, node_color='lightblue',
node_size=500, font_size=10)
plt.show()

4.2 自定义图算法实现

from collections import deque, defaultdict

# 1. BFS（广度优先搜索）
def bfs(graph, start):
"""图的广度优先遍历"""
visited = set()
queue = deque([start])
result = []

while queue:
vertex = queue.popleft()
if vertex not in visited:
visited.add(vertex)
result.append(vertex)
queue.extend(graph[vertex] - visited)

return result

# 2. DFS（深度优先搜索）- 递归版本
def dfs_recursive(graph, vertex, visited=None):
if visited is None:
visited = set()
visited.add(vertex)

for neighbor in graph[vertex]:
if neighbor not in visited:
dfs_recursive(graph, neighbor, visited)

return visited

# 3. Dijkstra算法
import heapq

def dijkstra(graph, start):
"""单源最短路径 - Dijkstra算法"""
distances = {vertex: float('infinity') for vertex in graph}
distances[start] = 0
pq = [(0, start)] # 优先队列

while pq:
current_distance, current_vertex = heapq.heappop(pq)

if current_distance > distances[current_vertex]:
continue

for neighbor, weight in graph[current_vertex].items():
distance = current_distance + weight

if distance < distances[neighbor]:
distances[neighbor] = distance
heapq.heappush(pq, (distance, neighbor))

return distances

# 4. 拓扑排序
def topological_sort(graph):
"""有向无环图的拓扑排序"""
in_degree = {u: 0 for u in graph}

# 计算入度
for u in graph:
for v in graph[u]:
in_degree[v] += 1

# 收集入度为0的节点
queue = deque([u for u in graph if in_degree[u] == 0])
result = []

while queue:
u = queue.popleft()
result.append(u)

for v in graph[u]:
in_degree[v] -= 1
if in_degree[v] == 0:
queue.append(v)

if len(result) != len(graph):
raise ValueError("图中存在环")

return result

五、数值计算与科学计算

5.1 使用NumPy进行数值计算

import numpy as np

# 1. 数组创建和操作
arr = np.array([1, 2, 3, 4, 5])
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# 2. 向量化操作（避免Python循环）
# 传统Python方式
squares = [x**2 for x in range(1000000)] # 慢

# NumPy向量化方式
squares_np = np.arange(1000000) ** 2 # 快

# 3. 广播机制
a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([10, 20, 30])
result = a + b # 广播：[[11, 22, 33], [14, 25, 36]]

# 4. 线性代数运算
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

dot_product = np.dot(A, B) # 矩阵乘法
determinant = np.linalg.det(A) # 行列式
eigenvalues, eigenvectors = np.linalg.eig(A) # 特征值和特征向量

# 5. 统计计算
data = np.random.randn(1000, 10) # 1000行10列的随机数据
mean = np.mean(data, axis=0) # 每列的平均值
std = np.std(data, axis=0) # 每列的标准差
correlation = np.corrcoef(data.T) # 相关系数矩阵

5.2 使用pandas进行数据分析

import pandas as pd
import numpy as np

# 1. 创建DataFrame
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'David'],
'Age': [25, 30, 35, 40],
'Salary': [50000, 60000, 70000, 80000],
'Department': ['HR', 'Engineering', 'Engineering', 'HR']
}
df = pd.DataFrame(data)

# 2. 数据筛选和查询
# 基本筛选
engineering_df = df[df['Department'] == 'Engineering']
high_salary = df[df['Salary'] > 60000]

# 复杂查询
result = df.query('Age > 30 and Salary < 75000')

# 3. 分组聚合
grouped = df.groupby('Department')
summary = grouped.agg({
'Age': ['mean', 'min', 'max'],
'Salary': ['sum', 'mean', 'std']
})

# 4. 数据透视表
pivot = pd.pivot_table(df,
values='Salary',
index='Department',
columns=None,
aggfunc=['mean', 'count', 'sum'])

# 5. 时间序列处理
dates = pd.date_range('2023-01-01', periods=100, freq='D')
time_series = pd.Series(np.random.randn(100), index=dates)

# 重采样
monthly_avg = time_series.resample('M').mean()
rolling_avg = time_series.rolling(window=7).mean() # 7天移动平均

六、机器学习算法

6.1 使用 scikit-learn

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# 1. 加载数据
iris = load_iris()
X, y = iris.data, iris.target

# 2. 数据预处理
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 3. 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# 4. 训练模型
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# 5. 模型评估
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

# 6. 交叉验证
cv_scores = cross_val_score(clf, X_scaled, y, cv=5)
print(f"交叉验证平均得分: {cv_scores.mean():.3f}")

# 7. 特征重要性
feature_importances = clf.feature_importances_
for name, importance in zip(iris.feature_names, feature_importances):
print(f"{name}: {importance:.3f}")

6.2 自定义机器学习算法

import numpy as np
from collections import Counter

class KNN:
"""K近邻算法实现"""
def __init__(self, k=3):
self.k = k

def fit(self, X, y):
self.X_train = X
self.y_train = y

def predict(self, X):
predictions = [self._predict(x) for x in X]
return np.array(predictions)

def _predict(self, x):
# 计算距离
distances = [np.linalg.norm(x - x_train) for x_train in self.X_train]

# 获取k个最近邻的索引
k_indices = np.argsort(distances)[:self.k]

# 获取k个最近邻的标签
k_nearest_labels = [self.y_train[i] for i in k_indices]

# 多数投票
most_common = Counter(k_nearest_labels).most_common(1)
return most_common[0][0]

class LinearRegression:
"""线性回归实现（梯度下降）"""
def __init__(self, learning_rate=0.01, n_iterations=1000):
self.learning_rate = learning_rate
self.n_iterations = n_iterations
self.weights = None
self.bias = None

def fit(self, X, y):
n_samples, n_features = X.shape

# 初始化参数
self.weights = np.zeros(n_features)
self.bias = 0

# 梯度下降
for _ in range(self.n_iterations):
# 预测
y_pred = np.dot(X, self.weights) + self.bias

# 计算梯度
dw = (1/n_samples) * np.dot(X.T, (y_pred - y))
db = (1/n_samples) * np.sum(y_pred - y)

# 更新参数
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db

def predict(self, X):
return np.dot(X, self.weights) + self.bias