news 2026/4/16 18:11:15

深度学习作业10代码

作者头像

张小明

前端开发工程师

1.2k 24
文章封面图
深度学习作业10代码

numpy实现

importnumpyasnp# 单个正向传播defsoftmax(x,axis=None):x_max=np.max(x,axis=axis,keepdims=True)e_x=np.exp(x-x_max)sum_e_x=np.sum(e_x,axis=axis,keepdims=True)returne_x/sum_e_xdefrnn_cell_forward(xt,a_prev,param):Wax=param['Wax']Waa=param['Waa']Wya=param['Wya']ba=param['ba']by=param['by']a_next=np.tanh(np.dot(Wax,xt)+np.dot(Waa,a_prev)+ba)yt_pred=softmax(np.dot(Wya,a_next)+by)cache=(a_next,a_prev,xt,param)returna_next,yt_pred,cache np.random.seed(1)xt=np.random.randn(3,10)# (n_x, m) 输入的维度,批量大小a_prev=np.random.randn(5,10)# (n_a, m) 隐藏状态的维度,批量大小Waa=np.random.randn(5,5)# (n_a, n_a) UWax=np.random.randn(5,3)# (n_a, n_x) WWya=np.random.randn(2,5)ba=np.random.randn(5,1)by=np.random.randn(2,1)param={"Waa":Waa,"Wax":Wax,"Wya":Wya,"ba":ba,"by":by}a_next,yt_pred,cache=rnn_cell_forward(xt,a_prev,param)print("a_next[4] = ",a_next[4])print("a_next shape = ",a_next.shape)print("yt_pred[1] =",yt_pred[1])print("yt_pred shape =",yt_pred.shape)# rnn正向传播defrnn_forward(x,a0,param):caches=[]n_x,m,T_x=x.shape n_y,n_a=param["Wya"].shape a=np.zeros((n_a,m,T_x))y_pred=np.zeros((n_y,m,T_x))a_next=a0fortinrange(T_x):a_next,yt_pred,cache=rnn_cell_forward(x[:,:,t],a_next,param)a[:,:,t]=a_next y_pred[:,:,t]=yt_pred caches.append(cache)caches=(caches,x)returna,y_pred,caches np.random.seed(1)x=np.random.randn(3,10,4)a0=np.random.randn(5,10)Waa=np.random.randn(5,5)Wax=np.random.randn(5,3)Wya=np.random.randn(2,5)ba=np.random.randn(5,1)by=np.random.randn(2,1)parameters={"Waa":Waa,"Wax":Wax,"Wya":Wya,"ba":ba,"by":by}a,y_pred,caches=rnn_forward(x,a0,parameters)print("a[4][1] = ",a[4][1])print("a.shape = ",a.shape)print("y_pred[1][3] =",y_pred[1][3])print("y_pred.shape = ",y_pred.shape)print("caches[1][1][3] =",caches[1][1][3])print("len(caches) = ",len(caches))# 单个反向传播defrnn_cell_backward(da_next,cache):(a_next,a_prev,xt,param)=cache Wax=param['Wax']Waa=param['Waa']Wya=param['Wya']ba=param['ba']by=param['by']dtanh=(1-a_next*a_next)*da_next dxt=np.dot(Wax.T,dtanh)dWax=np.dot(dtanh,xt.T)da_prev=np.dot(Waa.T,dtanh)dWaa=np.dot(dtanh,a_prev.T)dba=np.sum(dtanh,axis=-1,keepdims=True)gradiants={"dxt":dxt,"dWax":dWax,"da_prev":da_prev,"dWaa":dWaa,"dba":dba}returngradiants np.random.seed(1)xt=np.random.randn(3,10)a_prev=np.random.randn(5,10)Wax=np.random.randn(5,3)Waa=np.random.randn(5,5)Wya=np.random.randn(2,5)b=np.random.randn(5,1)by=np.random.randn(2,1)parameters={"Wax":Wax,"Waa":Waa,"Wya":Wya,"ba":ba,"by":by}a_next,yt,cache=rnn_cell_forward(xt,a_prev,parameters)da_next=np.random.randn(5,10)gradients=rnn_cell_backward(da_next,cache)print("gradients[\"dxt\"][1][2] =",gradients["dxt"][1][2])print("gradients[\"dxt\"].shape =",gradients["dxt"].shape)print("gradients[\"da_prev\"][2][3] =",gradients["da_prev"][2][3])print("gradients[\"da_prev\"].shape =",gradients["da_prev"].shape)print("gradients[\"dWax\"][3][1] =",gradients["dWax"][3][1])print("gradients[\"dWax\"].shape =",gradients["dWax"].shape)print("gradients[\"dWaa\"][1][2] =",gradients["dWaa"][1][2])print("gradients[\"dWaa\"].shape =",gradients["dWaa"].shape)print("gradients[\"dba\"][4] =",gradients["dba"][4])print("gradients[\"dba\"].shape =",gradients["dba"].shape)# rnn反向传播defrnn_backward(da,caches):caches,x=caches a1,a0,x1,param=caches[0]n_a,m,T_x=da.shape n_x,m=x1.shape dx=np.zeros((n_x,m,T_x))dWax=np.zeros((n_a,n_x))dWaa=np.zeros((n_a,n_a))dba=np.zeros((n_a,1))da0=np.zeros((n_a,m))da_prevt=np.zeros((n_a,m))fortinreversed(range(T_x)):gradients=rnn_cell_backward(da[:,:,t]+da_prevt,caches[t])dxt,da_prevt,dWaxt,dWaat,dbat=gradients["dxt"],\ gradients["da_prev"],gradients["dWax"],gradients["dWaa"],gradients["dba"]dx[:,:,t]=dxt dWax+=dWaxt dWaa+=dWaat dba+=dbat da0=da_prevt gradients={"dx":dx,"da0":da0,"dWax":dWax,"dWaa":dWaa,"dba":dba}returngradients np.random.seed(1)x=np.random.randn(3,10,4)a0=np.random.randn(5,10)Wax=np.random.randn(5,3)Waa=np.random.randn(5,5)Wya=np.random.randn(2,5)ba=np.random.randn(5,1)by=np.random.randn(2,1)parameters={"Wax":Wax,"Waa":Waa,"Wya":Wya,"ba":ba,"by":by}a,y,caches=rnn_forward(x,a0,parameters)da=np.random.randn(5,10,4)gradients=rnn_backward(da,caches)print("gradients[\"dx\"][1][2] =",gradients["dx"][1][2])print("gradients[\"dx\"].shape =",gradients["dx"].shape)print("gradients[\"da0\"][2][3] =",gradients["da0"][2][3])print("gradients[\"da0\"].shape =",gradients["da0"].shape)print("gradients[\"dWax\"][3][1] =",gradients["dWax"][3][1])print("gradients[\"dWax\"].shape =",gradients["dWax"].shape)print("gradients[\"dWaa\"][1][2] =",gradients["dWaa"][1][2])print("gradients[\"dWaa\"].shape =",gradients["dWaa"].shape)print("gradients[\"dba\"][4] =",gradients["dba"][4])print("gradients[\"dba\"].shape =",gradients["dba"].shape)

pytorch实现

importnumpyasnpimporttorchimporttorch.nnasnnimporttorch.nn.functionalasFdefrnn_cell_forward_torch(xt,a_prev,param):Wax=param['Wax']Waa=param['Waa']Wya=param['Wya']ba=param['ba']by=param['by']a_next=torch.tanh(torch.matmul(Wax,xt)+torch.matmul(Waa,a_prev)+ba)yt_pred=F.softmax(torch.matmul(Wya,a_next)+by,dim=0)cache=(a_next,a_prev,xt,param)returna_next,yt_pred,cache np.random.seed(1)xt=torch.Tensor(np.random.randn(3,10))# (n_x, m) 输入的维度,批量大小a_prev=torch.Tensor(np.random.randn(5,10))# (n_a, m) 隐藏状态的维度,批量大小Waa=torch.Tensor(np.random.randn(5,5))# (n_a, n_a) UWax=torch.Tensor(np.random.randn(5,3))# (n_a, n_x) WWya=torch.Tensor(np.random.randn(2,5))ba=torch.Tensor(np.random.randn(5,1))by=torch.Tensor(np.random.randn(2,1))param={"Waa":Waa,"Wax":Wax,"Wya":Wya,"ba":ba,"by":by}a_next,yt_pred,cache=rnn_cell_forward_torch(xt,a_prev,param)print("a_next[4] = ",a_next[4])print("a_next shape = ",a_next.shape)print("yt_pred[1] =",yt_pred[1])print("yt_pred shape =",yt_pred.shape)# rnn正向传播defrnn_forward_torch(x,a0,param):caches=[]n_x,m,T_x=x.shape n_y,n_a=param["Wya"].shape a=torch.zeros((n_a,m,T_x))y_pred=torch.zeros((n_y,m,T_x))a_next=a0fortinrange(T_x):a_next,yt_pred,cache=rnn_cell_forward_torch(x[:,:,t],a_next,param)a[:,:,t]=a_next y_pred[:,:,t]=yt_pred caches.append(cache)caches=(caches,x)returna,y_pred,caches np.random.seed(1)x=torch.Tensor(np.random.randn(3,10,4))a0=torch.Tensor(np.random.randn(5,10))Waa=torch.Tensor(np.random.randn(5,5))Wax=torch.Tensor(np.random.randn(5,3))Wya=torch.Tensor(np.random.randn(2,5))ba=torch.Tensor(np.random.randn(5,1))by=torch.Tensor(np.random.randn(2,1))parameters={"Waa":Waa,"Wax":Wax,"Wya":Wya,"ba":ba,"by":by}a,y_pred,caches=rnn_forward_torch(x,a0,parameters)print("a[4][1] = ",a[4][1])print("a.shape = ",a.shape)print("y_pred[1][3] =",y_pred[1][3])print("y_pred.shape = ",y_pred.shape)print("caches[1][1][3] =",caches[1][1][3])print("len(caches) = ",len(caches))# 单个反向传播defrnn_cell_backward_torch(da_next,cache):(a_next,a_prev,xt,param)=cache Wax=param['Wax']Waa=param['Waa']Wya=param['Wya']ba=param['ba']by=param['by']dtanh=(1-a_next*a_next)*da_next dxt=torch.matmul(Wax.T,dtanh)dWax=torch.matmul(dtanh,xt.T)da_prev=torch.matmul(Waa.T,dtanh)dWaa=torch.matmul(dtanh,a_prev.T)dba=torch.sum(dtanh,dim=-1,keepdims=True)gradiants={"dxt":dxt,"dWax":dWax,"da_prev":da_prev,"dWaa":dWaa,"dba":dba}returngradiants np.random.seed(1)xt=torch.Tensor(np.random.randn(3,10))a_prev=torch.Tensor(np.random.randn(5,10))Wax=torch.Tensor(np.random.randn(5,3))Waa=torch.Tensor(np.random.randn(5,5))Wya=torch.Tensor(np.random.randn(2,5))b=torch.Tensor(np.random.randn(5,1))by=torch.Tensor(np.random.randn(2,1))parameters={"Wax":Wax,"Waa":Waa,"Wya":Wya,"ba":ba,"by":by}a_next,yt,cache=rnn_cell_forward_torch(xt,a_prev,parameters)da_next=torch.Tensor(np.random.randn(5,10))gradients=rnn_cell_backward_torch(da_next,cache)print("gradients[\"dxt\"][1][2] =",gradients["dxt"][1][2])print("gradients[\"dxt\"].shape =",gradients["dxt"].shape)print("gradients[\"da_prev\"][2][3] =",gradients["da_prev"][2][3])print("gradients[\"da_prev\"].shape =",gradients["da_prev"].shape)print("gradients[\"dWax\"][3][1] =",gradients["dWax"][3][1])print("gradients[\"dWax\"].shape =",gradients["dWax"].shape)print("gradients[\"dWaa\"][1][2] =",gradients["dWaa"][1][2])print("gradients[\"dWaa\"].shape =",gradients["dWaa"].shape)print("gradients[\"dba\"][4] =",gradients["dba"][4])print("gradients[\"dba\"].shape =",gradients["dba"].shape)# rnn反向传播defrnn_backward_torch(da,caches):caches,x=caches a1,a0,x1,param=caches[0]n_a,m,T_x=da.shape n_x,m=x1.shape dx=torch.zeros((n_x,m,T_x))dWax=torch.zeros((n_a,n_x))dWaa=torch.zeros((n_a,n_a))dba=torch.zeros((n_a,1))da0=torch.zeros((n_a,m))da_prevt=torch.zeros((n_a,m))fortinreversed(range(T_x)):gradients=rnn_cell_backward_torch(da[:,:,t]+da_prevt,caches[t])dxt,da_prevt,dWaxt,dWaat,dbat=gradients["dxt"],\ gradients["da_prev"],gradients["dWax"],gradients["dWaa"],gradients["dba"]dx[:,:,t]=dxt dWax+=dWaxt dWaa+=dWaat dba+=dbat da0=da_prevt gradients={"dx":dx,"da0":da0,"dWax":dWax,"dWaa":dWaa,"dba":dba}returngradients np.random.seed(1)x=torch.Tensor(np.random.randn(3,10,4))a0=torch.Tensor(np.random.randn(5,10))Wax=torch.Tensor(np.random.randn(5,3))Waa=torch.Tensor(np.random.randn(5,5))Wya=torch.Tensor(np.random.randn(2,5))ba=torch.Tensor(np.random.randn(5,1))by=torch.Tensor(np.random.randn(2,1))parameters={"Wax":Wax,"Waa":Waa,"Wya":Wya,"ba":ba,"by":by}a,y,caches=rnn_forward_torch(x,a0,parameters)da=torch.Tensor(np.random.randn(5,10,4))gradients=rnn_backward_torch(da,caches)print("gradients[\"dx\"][1][2] =",gradients["dx"][1][2])print("gradients[\"dx\"].shape =",gradients["dx"].shape)print("gradients[\"da0\"][2][3] =",gradients["da0"][2][3])print("gradients[\"da0\"].shape =",gradients["da0"].shape)print("gradients[\"dWax\"][3][1] =",gradients["dWax"][3][1])print("gradients[\"dWax\"].shape =",gradients["dWax"].shape)print("gradients[\"dWaa\"][1][2] =",gradients["dWaa"][1][2])print("gradients[\"dWaa\"].shape =",gradients["dWaa"].shape)print("gradients[\"dba\"][4] =",gradients["dba"][4])print("gradients[\"dba\"].shape =",gradients["dba"].shape)

参考

https://blog.csdn.net/segegse/article/details/127708468

版权声明: 本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若内容造成侵权/违法违规/事实不符,请联系邮箱:809451989@qq.com进行投诉反馈,一经查实,立即删除!
网站建设 2026/4/16 12:22:03

ERNIE 4.5 VL:30亿参数如何重构多模态AI产业格局

ERNIE 4.5 VL:30亿参数如何重构多模态AI产业格局 【免费下载链接】ERNIE-4.5-VL-28B-A3B-Base-Paddle 项目地址: https://ai.gitcode.com/hf_mirrors/baidu/ERNIE-4.5-VL-28B-A3B-Base-Paddle 导语 百度ERNIE 4.5 VL系列模型以异构MoE架构实现30亿激活参数…

作者头像 李华
网站建设 2026/4/16 13:56:22

Taro瀑布流组件终极指南:让你的跨端应用布局效率提升40%

Taro瀑布流组件终极指南:让你的跨端应用布局效率提升40% 【免费下载链接】taro 开放式跨端跨框架解决方案,支持使用 React/Vue/Nerv 等框架来开发微信/京东/百度/支付宝/字节跳动/ QQ 小程序/H5/React Native 等应用。 https://taro.zone/ 项目地址: h…

作者头像 李华
网站建设 2026/4/16 2:52:02

三步掌握Proxmox VE Helper-Scripts离线容器部署全流程

三步掌握Proxmox VE Helper-Scripts离线容器部署全流程 【免费下载链接】Proxmox Proxmox VE Helper-Scripts 项目地址: https://gitcode.com/gh_mirrors/pr/Proxmox Proxmox VE Helper-Scripts是一个专为Proxmox VE环境设计的自动化工具集,能够大幅简化容器…

作者头像 李华
网站建设 2026/4/16 12:42:46

Fooocus-MRE:AI绘画革命性工具的深度解析

Fooocus-MRE:AI绘画革命性工具的深度解析 【免费下载链接】Fooocus-MRE Focus on prompting and generating 项目地址: https://gitcode.com/gh_mirrors/fo/Fooocus-MRE 快速入门指南 想要体验专业级的AI绘画效果?Fooocus-MRE为你提供了零门槛的…

作者头像 李华
网站建设 2026/4/15 18:38:03

铁砧附魔逻辑

AnvilMenu 1. 整体结构 类定义 public class AnvilMenu extends ItemCombinerMenu铁砧菜单继承自 ItemCombinerMenu,这是一个专门用于物品合成/组合的基类。 槽位定义 public static final int INPUT_SLOT = 0

作者头像 李华
网站建设 2026/4/16 15:30:03

43、系统调优:内核测量、大小优化与启动时间缩减

系统调优:内核测量、大小优化与启动时间缩减 1. 内核组件代码空间测量 在进行内核构建时,每个目录中的文件会被编译成 .o 文件,然后与该目录下的其他 .o 文件链接成 built-in.o 文件。子目录的 built-in.o 文件会被汇总成数量更少的 built-in.o 文件,最终链接到…

作者头像 李华