分布式系统测试:缓存、注册中心与链路追踪验证
上篇咱们搞定了消息队列测试,今天继续深入分布式系统的其他组件——Redis缓存、服务注册中心、分布式链路追踪。这些"基础设施"的测试往往被忽略,但出了问题定位起来最头疼。
一、Redis 缓存测试
缓存测试的核心问题:缓存命中、缓存穿透、缓存雪崩、数据一致性。
场景:订单详情缓存
@ServicepublicclassOrderQueryService{@AutowiredprivateOrderRepositoryorderRepository;@AutowiredprivateStringRedisTemplateredisTemplate;privatestaticfinalStringORDER_CACHE_KEY="order:%s";privatestaticfinallongCACHE_TTL=30;// 30分钟publicOrdergetOrder(LongorderId){Stringkey=String.format(ORDER_CACHE_KEY,orderId);// 1. 查缓存Stringcached=redisTemplate.opsForValue().get(key);if(cached!=null){returnJSON.parseObject(cached,Order.class);}// 2. 查数据库Orderorder=orderRepository.findById(orderId).orElseThrow(()->newOrderNotFoundException(orderId));// 3. 写缓存redisTemplate.opsForValue().set(key,JSON.toJSONString(order),CACHE_TTL,TimeUnit.MINUTES);returnorder;}@CacheEvict(key="'order:' + #orderId")publicvoidupdateOrder(LongorderId,OrderUpdateRequestrequest){// 更新数据库...// 缓存由@CacheEvict自动删除}}测试方案:Testcontainers Redis
<dependency><groupId>com.redis.testcontainers</groupId><artifactId>testcontainers-redis-junit</artifactId><version>2.2.0</version><scope>test</scope></dependency>@SpringBootTest@TestcontainersclassOrderCacheTest{@ContainerstaticRedisContainerredis=newRedisContainer(DockerImageName.parse("redis:7-alpine"));@DynamicPropertySourcestaticvoidconfigureRedis(DynamicPropertyRegistryregistry){registry.add("spring.data.redis.host",redis::getHost);registry.add("spring.data.redis.port",redis::getMappedPort(6379));}@AutowiredOrderQueryServicequeryService;@AutowiredOrderRepositoryorderRepository;@AutowiredStringRedisTemplateredisTemplate;@BeforeEachvoidsetUp(){// 清空缓存redisTemplate.getConnectionFactory().getConnection().flushAll();}@Test@DisplayName("首次查询:缓存未命中,查数据库并写入缓存")voidshouldQueryDBAndCacheOnFirstAccess(){// Given: 数据库有数据Orderorder=orderRepository.save(newOrder(1L,"ITEM-001",newBigDecimal("99.99")));// When: 第一次查询Orderresult=queryService.getOrder(order.getId());// Then: 返回正确数据assertThat(result.getId()).isEqualTo(order.getId());// Then: 缓存已写入Stringcached=redisTemplate.opsForValue().get("order:"+order.getId());assertThat(cached).isNotNull();assertThat(cached).contains("ITEM-001");}@Test@DisplayName("二次查询:缓存命中,不查数据库")voidshouldHitCacheOnSecondAccess(){// Given: 数据已在缓存Orderorder=orderRepository.save(newOrder(1L,"ITEM-001",newBigDecimal("99.99")));queryService.getOrder(order.getId());// 预热缓存// When: 再次查询Orderresult=queryService.getOrder(order.getId());// Then: 结果正确(虽然没有直接验证"没查DB",但可以通过监控验证)assertThat(result.getSku()).isEqualTo("ITEM-001");}@Test@DisplayName("更新订单后,缓存失效")voidshouldInvalidateCacheOnUpdate(){// Given: 缓存已有数据Orderorder=orderRepository.save(newOrder(1L,"ITEM-001",newBigDecimal("99.99")));queryService.getOrder(order.getId());// 写缓存// When: 更新订单queryService.updateOrder(order.getId(),newOrderUpdateRequest("ITEM-002"));// Then: 缓存已删除Stringcached=redisTemplate.opsForValue().get("order:"+order.getId());assertThat(cached).isNull();}@Test@DisplayName("缓存过期后,重新查数据库")voidshouldQueryDBAfterCacheExpire()throwsInterruptedException{// Given: 写入缓存(TTL设短一点方便测试)Orderorder=orderRepository.save(newOrder(1L,"ITEM-001",newBigDecimal("99.99")));queryService.getOrder(order.getId());// When: 等待缓存过期(测试中可以把TTL设为1秒)Thread.sleep(2000);// Then: 缓存已过期Stringcached=redisTemplate.opsForValue().get("order:"+order.getId());assertThat(cached).isNull();// 再次查询应该重新查DBOrderresult=queryService.getOrder(order.getId());assertThat(result).isNotNull();}}缓存穿透测试
@Test@DisplayName("查询不存在的订单,不缓存空值(防穿透)")voidshouldNotCacheNullResult(){// When: 查询不存在的订单assertThatThrownBy(()->queryService.getOrder(99999L)).isInstanceOf(OrderNotFoundException.class);// Then: 不应该缓存空值(否则恶意请求会压垮DB)Stringcached=redisTemplate.opsForValue().get("order:99999");assertThat(cached).isNull();}@Test@DisplayName("查询不存在的订单,缓存空值(布隆过滤器方案)")voidshouldCacheNullWithShortTTL(){// 另一种方案:缓存空值,但TTL很短(比如1分钟)// 验证空值缓存的TTL}二、服务注册中心测试
微服务通过注册中心(Nacos/Eureka/Consul)互相发现。测试中需要验证:服务是否正确注册、是否能被发现、故障时是否剔除。
Nacos 测试
@SpringBootTest(webEnvironment=SpringBootTest.WebEnvironment.RANDOM_PORT)@TestcontainersclassServiceDiscoveryTest{@ContainerstaticGenericContainer<?>nacos=newGenericContainer<>(DockerImageName.parse("nacos/nacos-server:v2.2.3")).withEnv("MODE","standalone").withExposedPorts(8848).waitingFor(Wait.forHttp("/nacos").forStatusCode(200));@DynamicPropertySourcestaticvoidconfigureNacos(DynamicPropertyRegistryregistry){StringnacosUrl=String.format("http://%s:%d",nacos.getHost(),nacos.getMappedPort(8848));registry.add("spring.cloud.nacos.discovery.server-addr",()->nacosUrl);registry.add("spring.cloud.nacos.config.server-addr",()->nacosUrl);}@AutowiredDiscoveryClientdiscoveryClient;@Test@DisplayName("服务启动后自动注册到Nacos")voidshouldRegisterToNacos(){// 等待注册完成await().atMost(Duration.ofSeconds(30)).pollInterval(Duration.ofSeconds(1)).untilAsserted(()->{List<ServiceInstance>instances=discoveryClient.getInstances("order-service");assertThat(instances).isNotEmpty();});}@Test@DisplayName("能从Nacos发现用户服务")voidshouldDiscoverUserService(){// 先手动注册一个用户服务实例(模拟)registerMockService("user-service","localhost",8081);// 验证能发现List<ServiceInstance>instances=discoveryClient.getInstances("user-service");assertThat(instances).hasSize(1);assertThat(instances.get(0).getHost()).isEqualTo("localhost");}}三、分布式链路追踪测试
链路追踪(Sleuth + Zipkin/Jaeger)能帮你追踪请求在多个服务间的流转。测试中需要验证:TraceId是否正确传递、Span是否完整、链路数据是否正确上报。
场景:验证TraceId传递
@SpringBootTest(webEnvironment=SpringBootTest.WebEnvironment.RANDOM_PORT)classTracingTest{@AutowiredTestRestTemplaterestTemplate;@AutowiredTracertracer;// Micrometer Tracing@Test@DisplayName("HTTP请求携带TraceId,并在服务间传递")voidshouldPropagateTraceId(){// When: 发送请求(带自定义TraceId)StringcustomTraceId="abc123";ResponseEntity<String>response=restTemplate.exchange("/api/orders/1",HttpMethod.GET,newHttpEntity<>(Map.of("X-B3-TraceId",customTraceId)),String.class);// Then: 响应中应该包含Trace信息assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);// 验证日志中包含了TraceId// 可以通过Appender捕获日志验证}@Test@DisplayName("异步任务继承父Span的TraceId")voidshouldInheritTraceInAsyncTask()throwsException{// Given: 当前有活跃的SpanSpanparentSpan=tracer.nextSpan().name("parent-operation").start();try(Tracer.SpanInScopews=tracer.withSpanInScope(parentSpan)){// When: 提交异步任务CompletableFuture<String>future=CompletableFuture.supplyAsync(()->{// Then: 异步线程中应该能获取到相同的TraceIdSpancurrentSpan=tracer.currentSpan();assertThat(currentSpan).isNotNull();assertThat(currentSpan.context().traceId()).isEqualTo(parentSpan.context().traceId());return"done";});future.get(5,TimeUnit.SECONDS);}finally{parentSpan.end();}}}Zipkin 验证
@TestcontainersclassZipkinIntegrationTest{@ContainerstaticGenericContainer<?>zipkin=newGenericContainer<>(DockerImageName.parse("openzipkin/zipkin:2.24")).withExposedPorts(9411);@Test@DisplayName("链路数据正确上报到Zipkin")voidshouldReportTracesToZipkin(){// 触发一个跨服务请求orderService.createOrder(request);// 等待数据上报await().atMost(Duration.ofSeconds(10)).untilAsserted(()->{// 查询Zipkin API验证Trace存在StringzipkinUrl=String.format("http://%s:%d",zipkin.getHost(),zipkin.getMappedPort(9411));ResponseEntity<String>response=restTemplate.getForEntity(zipkinUrl+"/api/v2/traces?serviceName=order-service",String.class);assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);assertThat(response.getBody()).contains("order-service");});}}四、混沌测试入门
分布式系统测试的终极形态——故意搞破坏,看系统能不能扛住。
简单实现:随机杀容器
@Test@DisplayName("Redis故障时,服务应该降级查数据库")voidshouldFallbackWhenRedisDown(){// Given: 缓存已有数据Orderorder=queryService.getOrder(1L);assertThat(order).isNotNull();// When: 杀掉Redis容器redis.stop();// Then: 服务应该降级查数据库,不抛异常OrderfallbackOrder=queryService.getOrder(1L);assertThat(fallbackOrder).isNotNull();assertThat(fallbackOrder.getId()).isEqualTo(1L);// 恢复Redisredis.start();}专业工具:Chaos Monkey for Spring Boot
<dependency><groupId>de.codecentric</groupId><artifactId>chaos-monkey-spring-boot</artifactId><version>3.0.2</version><scope>test</scope></dependency># application-chaos.ymlchaos:monkey:enabled:trueassaults:level:3# 攻击强度 1-10latency-active:truelatency-range-start:1000latency-range-end:3000exceptions-active:trueexception:type:java.io.IOExceptionargument:"模拟IO异常"@SpringBootTest@ActiveProfiles("chaos")classChaosTest{@AutowiredOrderServiceorderService;@Test@DisplayName("在混沌攻击下,核心流程仍然可用")voidshouldSurviveChaos(){// 即使服务被注入延迟和异常,核心功能应该仍然可用// 验证降级、熔断、重试机制是否生效for(inti=0;i<10;i++){try{OrderResultresult=orderService.createOrder(request);// 记录成功/失败}catch(Exceptione){// 验证是预期的异常类型assertThat(e).isInstanceOfAny(ServiceUnavailableException.class,TimeoutException.class);}}}}五、小结
今天咱们聊了分布式系统的测试:
| 组件 | 测试重点 | 工具 |
|---|---|---|
| Redis缓存 | 命中/穿透/雪崩/一致性 | Testcontainers Redis |
| 注册中心 | 服务注册/发现/剔除 | Testcontainers Nacos |
| 链路追踪 | TraceId传递/Span完整性 | Micrometer Tracing + Zipkin |
| 混沌测试 | 故障降级/熔断/恢复 | Chaos Monkey |
一句话总结:分布式系统的测试不能只验证"正常情况",缓存穿透、服务故障、网络延迟这些"异常场景"才是价值所在。Testcontainers让你能在测试中真实模拟这些场景。