{"id":335,"date":"2022-07-09T17:05:10","date_gmt":"2022-07-09T09:05:10","guid":{"rendered":"https:\/\/www.lazybirds.top\/?p=335"},"modified":"2022-07-09T17:05:10","modified_gmt":"2022-07-09T09:05:10","slug":"d2l%e8%87%aa%e5%ad%a6%e7%ac%94%e8%ae%b0-3-%e7%ba%bf%e6%80%a7%e7%a5%9e%e7%bb%8f%e7%bd%91%e7%bb%9c","status":"publish","type":"post","link":"https:\/\/www.lazybirds.top\/?p=335","title":{"rendered":"d2l\u81ea\u5b66\u7b14\u8bb0 &#8211; 3.\u7ebf\u6027\u795e\u7ecf\u7f51\u7edc"},"content":{"rendered":"\n<p>\u8be5\u7ae0\u4e3b\u8981\u5305\u62ec\u7ebf\u6027\u56de\u5f52\u4e0esoftmax\u56de\u5f52\u3002<\/p>\n\n\n\n<p><a href=\"https:\/\/zh.d2l.ai\/\">\u300a\u52a8\u624b\u5b66\u6df1\u5ea6\u5b66\u4e60\u300b \u2014 \u52a8\u624b\u5b66\u6df1\u5ea6\u5b66\u4e60 2.0.0-beta0 documentation (d2l.ai)<\/a><\/p>\n\n\n\n<!--more-->\n\n\n\n      <meta charset=\"utf-8\">\n      <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n      \n      <link rel=\"stylesheet\" href=\"https:\/\/www.lazybirds.top\/katex\/katex.min.css\">\n      \n      \n      \n      \n      \n      \n      \n      \n      \n      <style>\n      \/**\n * prism.js Github theme based on GitHub's theme.\n * @author Sam Clarke\n *\/\ncode[class*=\"language-\"],\npre[class*=\"language-\"] {\n  color: #333;\n  background: none;\n  font-family: Consolas, \"Liberation Mono\", Menlo, Courier, monospace;\n  text-align: left;\n  white-space: pre;\n  word-spacing: normal;\n  word-break: normal;\n  word-wrap: normal;\n  line-height: 1.4;\n\n  -moz-tab-size: 8;\n  -o-tab-size: 8;\n  tab-size: 8;\n\n  -webkit-hyphens: none;\n  -moz-hyphens: none;\n  -ms-hyphens: none;\n  hyphens: none;\n}\n\n\/* Code blocks *\/\npre[class*=\"language-\"] {\n  padding: .8em;\n  overflow: auto;\n  \/* border: 1px solid #ddd; *\/\n  border-radius: 3px;\n  \/* background: #fff; *\/\n  background: #f5f5f5;\n}\n\n\/* Inline code *\/\n:not(pre) > code[class*=\"language-\"] {\n  padding: .1em;\n  border-radius: .3em;\n  white-space: normal;\n  background: #f5f5f5;\n}\n\n.token.comment,\n.token.blockquote {\n  color: #969896;\n}\n\n.token.cdata {\n  color: #183691;\n}\n\n.token.doctype,\n.token.punctuation,\n.token.variable,\n.token.macro.property {\n  color: #333;\n}\n\n.token.operator,\n.token.important,\n.token.keyword,\n.token.rule,\n.token.builtin {\n  color: #a71d5d;\n}\n\n.token.string,\n.token.url,\n.token.regex,\n.token.attr-value {\n  color: #183691;\n}\n\n.token.property,\n.token.number,\n.token.boolean,\n.token.entity,\n.token.atrule,\n.token.constant,\n.token.symbol,\n.token.command,\n.token.code {\n  color: #0086b3;\n}\n\n.token.tag,\n.token.selector,\n.token.prolog {\n  color: #63a35c;\n}\n\n.token.function,\n.token.namespace,\n.token.pseudo-element,\n.token.class,\n.token.class-name,\n.token.pseudo-class,\n.token.id,\n.token.url-reference .token.variable,\n.token.attr-name {\n  color: #795da3;\n}\n\n.token.entity {\n  cursor: help;\n}\n\n.token.title,\n.token.title .token.punctuation {\n  font-weight: bold;\n  color: #1d3e81;\n}\n\n.token.list {\n  color: #ed6a43;\n}\n\n.token.inserted {\n  background-color: #eaffea;\n  color: #55a532;\n}\n\n.token.deleted {\n  background-color: #ffecec;\n  color: #bd2c00;\n}\n\n.token.bold {\n  font-weight: bold;\n}\n\n.token.italic {\n  font-style: italic;\n}\n\n\n\/* JSON *\/\n.language-json .token.property {\n  color: #183691;\n}\n\n.language-markup .token.tag .token.punctuation {\n  color: #333;\n}\n\n\/* CSS *\/\ncode.language-css,\n.language-css .token.function {\n  color: #0086b3;\n}\n\n\/* YAML *\/\n.language-yaml .token.atrule {\n  color: #63a35c;\n}\n\ncode.language-yaml {\n  color: #183691;\n}\n\n\/* Ruby *\/\n.language-ruby .token.function {\n  color: #333;\n}\n\n\/* Markdown *\/\n.language-markdown .token.url {\n  color: #795da3;\n}\n\n\/* Makefile *\/\n.language-makefile .token.symbol {\n  color: #795da3;\n}\n\n.language-makefile .token.variable {\n  color: #183691;\n}\n\n.language-makefile .token.builtin {\n  color: #0086b3;\n}\n\n\/* Bash *\/\n.language-bash .token.keyword {\n  color: #0086b3;\n}\n\n\/* highlight *\/\npre[data-line] {\n  position: relative;\n  padding: 1em 0 1em 3em;\n}\npre[data-line] .line-highlight-wrapper {\n  position: absolute;\n  top: 0;\n  left: 0;\n  background-color: transparent;\n  display: block;\n  width: 100%;\n}\n\npre[data-line] .line-highlight {\n  position: absolute;\n  left: 0;\n  right: 0;\n  padding: inherit 0;\n  margin-top: 1em;\n  background: hsla(24, 20%, 50%,.08);\n  background: linear-gradient(to right, hsla(24, 20%, 50%,.1) 70%, hsla(24, 20%, 50%,0));\n  pointer-events: none;\n  line-height: inherit;\n  white-space: pre;\n}\n\npre[data-line] .line-highlight:before, \npre[data-line] .line-highlight[data-end]:after {\n  content: attr(data-start);\n  position: absolute;\n  top: .4em;\n  left: .6em;\n  min-width: 1em;\n  padding: 0 .5em;\n  background-color: hsla(24, 20%, 50%,.4);\n  color: hsl(24, 20%, 95%);\n  font: bold 65%\/1.5 sans-serif;\n  text-align: center;\n  vertical-align: .3em;\n  border-radius: 999px;\n  text-shadow: none;\n  box-shadow: 0 1px white;\n}\n\npre[data-line] .line-highlight[data-end]:after {\n  content: attr(data-end);\n  top: auto;\n  bottom: .4em;\n}html body{font-family:\"Helvetica Neue\",Helvetica,\"Segoe UI\",Arial,freesans,sans-serif;font-size:16px;line-height:1.6;color:#333;background-color:#fff;overflow:initial;box-sizing:border-box;word-wrap:break-word}html body>:first-child{margin-top:0}html body h1,html body h2,html body h3,html body h4,html body h5,html body h6{line-height:1.2;margin-top:1em;margin-bottom:16px;color:#000}html body h1{font-size:2.25em;font-weight:300;padding-bottom:.3em}html body h2{font-size:1.75em;font-weight:400;padding-bottom:.3em}html body h3{font-size:1.5em;font-weight:500}html body h4{font-size:1.25em;font-weight:600}html body h5{font-size:1.1em;font-weight:600}html body h6{font-size:1em;font-weight:600}html body h1,html body h2,html body h3,html body h4,html body h5{font-weight:600}html body h5{font-size:1em}html body h6{color:#5c5c5c}html body strong{color:#000}html body del{color:#5c5c5c}html body a:not([href]){color:inherit;text-decoration:none}html body a{color:#08c;text-decoration:none}html body a:hover{color:#00a3f5;text-decoration:none}html body img{max-width:100%}html body>p{margin-top:0;margin-bottom:16px;word-wrap:break-word}html body>ul,html body>ol{margin-bottom:16px}html body ul,html body ol{padding-left:2em}html body ul.no-list,html body ol.no-list{padding:0;list-style-type:none}html body ul ul,html body ul ol,html body ol ol,html body ol ul{margin-top:0;margin-bottom:0}html body li{margin-bottom:0}html body li.task-list-item{list-style:none}html body li>p{margin-top:0;margin-bottom:0}html body .task-list-item-checkbox{margin:0 .2em .25em -1.8em;vertical-align:middle}html body .task-list-item-checkbox:hover{cursor:pointer}html body blockquote{margin:16px 0;font-size:inherit;padding:0 15px;color:#5c5c5c;background-color:#f0f0f0;border-left:4px solid #d6d6d6}html body blockquote>:first-child{margin-top:0}html body blockquote>:last-child{margin-bottom:0}html body hr{height:4px;margin:32px 0;background-color:#d6d6d6;border:0 none}html body table{margin:10px 0 15px 0;border-collapse:collapse;border-spacing:0;display:block;width:100%;overflow:auto;word-break:normal;word-break:keep-all}html body table th{font-weight:bold;color:#000}html body table td,html body table th{border:1px solid #d6d6d6;padding:6px 13px}html body dl{padding:0}html body dl dt{padding:0;margin-top:16px;font-size:1em;font-style:italic;font-weight:bold}html body dl dd{padding:0 16px;margin-bottom:16px}html body code{font-family:Menlo,Monaco,Consolas,'Courier New',monospace;font-size:.85em !important;color:#000;background-color:#f0f0f0;border-radius:3px;padding:.2em 0}html body code::before,html body code::after{letter-spacing:-0.2em;content:\"\\00a0\"}html body pre>code{padding:0;margin:0;font-size:.85em !important;word-break:normal;white-space:pre;background:transparent;border:0}html body .highlight{margin-bottom:16px}html body .highlight pre,html body pre{padding:1em;overflow:auto;font-size:.85em !important;line-height:1.45;border:#d6d6d6;border-radius:3px}html body .highlight pre{margin-bottom:0;word-break:normal}html body pre code,html body pre tt{display:inline;max-width:initial;padding:0;margin:0;overflow:initial;line-height:inherit;word-wrap:normal;background-color:transparent;border:0}html body pre code:before,html body pre tt:before,html body pre code:after,html body pre tt:after{content:normal}html body p,html body blockquote,html body ul,html body ol,html body dl,html body pre{margin-top:0;margin-bottom:16px}html body kbd{color:#000;border:1px solid #d6d6d6;border-bottom:2px solid #c7c7c7;padding:2px 4px;background-color:#f0f0f0;border-radius:3px}@media print{html body{background-color:#fff}html body h1,html body h2,html body h3,html body h4,html body h5,html body h6{color:#000;page-break-after:avoid}html body blockquote{color:#5c5c5c}html body pre{page-break-inside:avoid}html body table{display:table}html body img{display:block;max-width:100%;max-height:100%}html body pre,html body code{word-wrap:break-word;white-space:pre}}.markdown-preview{width:100%;height:100%;box-sizing:border-box}.markdown-preview .pagebreak,.markdown-preview .newpage{page-break-before:always}.markdown-preview pre.line-numbers{position:relative;padding-left:3.8em;counter-reset:linenumber}.markdown-preview pre.line-numbers>code{position:relative}.markdown-preview pre.line-numbers .line-numbers-rows{position:absolute;pointer-events:none;top:1em;font-size:100%;left:0;width:3em;letter-spacing:-1px;border-right:1px solid #999;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.markdown-preview pre.line-numbers .line-numbers-rows>span{pointer-events:none;display:block;counter-increment:linenumber}.markdown-preview pre.line-numbers .line-numbers-rows>span:before{content:counter(linenumber);color:#999;display:block;padding-right:.8em;text-align:right}.markdown-preview .mathjax-exps .MathJax_Display{text-align:center !important}.markdown-preview:not([for=\"preview\"]) .code-chunk .btn-group{display:none}.markdown-preview:not([for=\"preview\"]) .code-chunk .status{display:none}.markdown-preview:not([for=\"preview\"]) .code-chunk .output-div{margin-bottom:16px}.scrollbar-style::-webkit-scrollbar{width:8px}.scrollbar-style::-webkit-scrollbar-track{border-radius:10px;background-color:transparent}.scrollbar-style::-webkit-scrollbar-thumb{border-radius:5px;background-color:rgba(150,150,150,0.66);border:4px solid rgba(150,150,150,0.66);background-clip:content-box}html body[for=\"html-export\"]:not([data-presentation-mode]){position:relative;width:100%;height:100%;top:0;left:0;margin:0;padding:0;overflow:auto}html body[for=\"html-export\"]:not([data-presentation-mode]) .markdown-preview{position:relative;top:0}@media screen and (min-width:914px){html body[for=\"html-export\"]:not([data-presentation-mode]) .markdown-preview{padding:2em calc(50% - 457px + 2em)}}@media screen and (max-width:914px){html body[for=\"html-export\"]:not([data-presentation-mode]) .markdown-preview{padding:2em}}@media screen and (max-width:450px){html body[for=\"html-export\"]:not([data-presentation-mode]) .markdown-preview{font-size:14px !important;padding:1em}}@media print{html body[for=\"html-export\"]:not([data-presentation-mode]) #sidebar-toc-btn{display:none}}html body[for=\"html-export\"]:not([data-presentation-mode]) #sidebar-toc-btn{position:fixed;bottom:8px;left:8px;font-size:28px;cursor:pointer;color:inherit;z-index:99;width:32px;text-align:center;opacity:.4}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] #sidebar-toc-btn{opacity:1}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc{position:fixed;top:0;left:0;width:300px;height:100%;padding:32px 0 48px 0;font-size:14px;box-shadow:0 0 4px rgba(150,150,150,0.33);box-sizing:border-box;overflow:auto;background-color:inherit}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar{width:8px}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar-track{border-radius:10px;background-color:transparent}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc::-webkit-scrollbar-thumb{border-radius:5px;background-color:rgba(150,150,150,0.66);border:4px solid rgba(150,150,150,0.66);background-clip:content-box}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc a{text-decoration:none}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc ul{padding:0 1.6em;margin-top:.8em}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc li{margin-bottom:.8em}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .md-sidebar-toc ul{list-style-type:none}html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{left:300px;width:calc(100% -  300px);padding:2em calc(50% - 457px -  150px);margin:0;box-sizing:border-box}@media screen and (max-width:1274px){html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{padding:2em}}@media screen and (max-width:450px){html body[for=\"html-export\"]:not([data-presentation-mode])[html-show-sidebar-toc] .markdown-preview{width:100%}}html body[for=\"html-export\"]:not([data-presentation-mode]):not([html-show-sidebar-toc]) .markdown-preview{left:50%;transform:translateX(-50%)}html body[for=\"html-export\"]:not([data-presentation-mode]):not([html-show-sidebar-toc]) .md-sidebar-toc{display:none}\n\/* Please visit the URL below for more information: *\/\n\/*   https:\/\/shd101wyy.github.io\/markdown-preview-enhanced\/#\/customize-css *\/\n\n      <\/style>\n\n\n      <div class=\"mume markdown-preview  \">\n      \n\n<p>&#x4E24;&#x7C7B;&#x9884;&#x6D4B;&#x95EE;&#x9898;&#xFF1A;<\/p>\n<ul>\n<li>&#x56DE;&#x5F52;<\/li>\n<li>&#x5206;&#x7C7B;<\/li>\n<\/ul>\n<h1 class=\"mume-header\" id=\"%E7%BA%BF%E6%80%A7%E5%9B%9E%E5%BD%92\">&#x7EBF;&#x6027;&#x56DE;&#x5F52;<\/h1>\n\n<h2 class=\"mume-header\" id=\"%E4%B8%80%E4%BA%9B%E6%A6%82%E5%BF%B5\">&#x4E00;&#x4E9B;&#x6982;&#x5FF5;<\/h2>\n\n<ul>\n<li>\n<p>&#x4EFF;&#x5C04;&#x53D8;&#x6362;&#xFF08;affine transformation&#xFF09;<\/p>\n<\/li>\n<li>\n<p>&#x89E3;&#x6790;&#x89E3;&#xFF1A;&#x7EBF;&#x6027;&#x56DE;&#x5F52;&#x5B58;&#x5728;&#x89E3;&#x6790;&#x89E3;&#xFF0C;&#x4F46;&#x5E76;&#x4E0D;&#x662F;&#x6240;&#x6709;&#x95EE;&#x9898;&#x90FD;&#x5B58;&#x5728;&#x89E3;&#x6790;&#x89E3;<\/p>\n<\/li>\n<li>\n<p>&#x6279;&#x91CF;&#x5927;&#x5C0F;&#xFF08;batch size&#xFF09;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi mathvariant=\"normal\">&#x2223;<\/mi><mi mathvariant=\"script\">B<\/mi><mi mathvariant=\"normal\">&#x2223;<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">|\\mathcal{B}|<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord\">&#x2223;<\/span><span class=\"mord mathcal\" style=\"margin-right:0.03041em;\">B<\/span><span class=\"mord\">&#x2223;<\/span><\/span><\/span><\/span>&#xFF1A;&#x6BCF;&#x4E2A;&#x5C0F;&#x6279;&#x91CF;&#x4E2D;&#x7684;&#x6837;&#x672C;&#x6570;<\/p>\n<\/li>\n<li>\n<p>&#x5B66;&#x4E60;&#x7387;&#xFF08;learning rate&#xFF09;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi mathvariant=\"normal\">&#x2223;<\/mi><mi>&#x3B7;<\/mi><mi mathvariant=\"normal\">&#x2223;<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">|\\eta|<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord\">&#x2223;<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">&#x3B7;<\/span><span class=\"mord\">&#x2223;<\/span><\/span><\/span><\/span><\/p>\n<\/li>\n<li>\n<p>&#x9884;&#x6D4B;&#xFF08;prediction&#xFF09;&#xFF1A;&#x7ED9;&#x5B9A;&#x7279;&#x5F81;&#x4F30;&#x8BA1;&#x76EE;&#x6807;<\/p>\n<\/li>\n<li>\n<p>&#x63A8;&#x65AD;&#xFF08;inference&#xFF09;&#xFF1A;&#x57FA;&#x4E8E;&#x6570;&#x636E;&#x96C6;&#x4F30;&#x8BA1;&#x53C2;&#x6570;<\/p>\n<\/li>\n<\/ul>\n<h2 class=\"mume-header\" id=\"%E6%AD%A3%E6%80%81%E5%88%86%E5%B8%83%E4%B8%8E%E5%B9%B3%E6%96%B9%E6%8D%9F%E5%A4%B1\">&#x6B63;&#x6001;&#x5206;&#x5E03;&#x4E0E;&#x5E73;&#x65B9;&#x635F;&#x5931;<\/h2>\n\n<p>&#x5047;&#x8BBE;&#x89C2;&#x6D4B;&#x4E2D;&#x5305;&#x542B;&#x566A;&#x58F0;&#xFF0C;&#x566A;&#x58F0;&#x670D;&#x4ECE;&#x6B63;&#x6001;&#x5206;&#x5E03;&#x3002;<\/p>\n<p><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mi>y<\/mi><mo>=<\/mo><msup><mi mathvariant=\"bold\">w<\/mi><mi mathvariant=\"normal\">&#x22A4;<\/mi><\/msup><mi mathvariant=\"bold\">x<\/mi><mo>+<\/mo><mi>b<\/mi><mo>+<\/mo><mi>&#x3F5;<\/mi><mspace linebreak=\"newline\"><\/mspace><mi>&#x3F5;<\/mi><mo>&#x223C;<\/mo><mi mathvariant=\"script\">N<\/mi><mo stretchy=\"false\">(<\/mo><mn>0<\/mn><mo separator=\"true\">,<\/mo><msup><mi>&#x3C3;<\/mi><mn>2<\/mn><\/msup><mo stretchy=\"false\">)<\/mo><\/mrow><annotation encoding=\"application\/x-tex\">y=\\mathbf{w}^\\top\\mathbf{x}+b+\\epsilon\\\\\n\\epsilon\\sim\\mathcal{N}(0,\\sigma^2)<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.625em;vertical-align:-0.1944em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:0.9824em;vertical-align:-0.0833em;\"><\/span><span class=\"mord\"><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">w<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.8991em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">&#x22A4;<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathbf\">x<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">+<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:0.7778em;vertical-align:-0.0833em;\"><\/span><span class=\"mord mathnormal\">b<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">+<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:0.4306em;\"><\/span><span class=\"mord mathnormal\">&#x3F5;<\/span><\/span><span class=\"mspace newline\"><\/span><span class=\"base\"><span class=\"strut\" style=\"height:0.4306em;\"><\/span><span class=\"mord mathnormal\">&#x3F5;<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">&#x223C;<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1.1141em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathcal\" style=\"margin-right:0.14736em;\">N<\/span><span class=\"mopen\">(<\/span><span class=\"mord\">0<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">&#x3C3;<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.8641em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/p>\n<p><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mi>P<\/mi><mo stretchy=\"false\">(<\/mo><mi>y<\/mi><mo>&#x2223;<\/mo><mi mathvariant=\"bold\">x<\/mi><mo stretchy=\"false\">)<\/mo><mo>=<\/mo><mfrac><mn>1<\/mn><msqrt><mrow><mn>2<\/mn><mi>&#x3C0;<\/mi><msup><mi>&#x3C3;<\/mi><mn>2<\/mn><\/msup><\/mrow><\/msqrt><\/mfrac><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><mo>&#x2212;<\/mo><mfrac><mn>1<\/mn><mrow><mn>2<\/mn><msup><mi>&#x3C3;<\/mi><mn>2<\/mn><\/msup><\/mrow><\/mfrac><msup><mrow><mo fence=\"true\">(<\/mo><mi>y<\/mi><mo>&#x2212;<\/mo><msup><mi mathvariant=\"bold\">w<\/mi><mi mathvariant=\"normal\">&#x22A4;<\/mi><\/msup><mi mathvariant=\"bold\">x<\/mi><mo>&#x2212;<\/mo><mi>b<\/mi><mo fence=\"true\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><mo fence=\"true\">)<\/mo><\/mrow><mspace linebreak=\"newline\"><\/mspace><mi>P<\/mi><mo stretchy=\"false\">(<\/mo><mi mathvariant=\"bold\">y<\/mi><mo>&#x2223;<\/mo><mi mathvariant=\"bold\">X<\/mi><mo stretchy=\"false\">)<\/mo><mo>=<\/mo><munderover><mo>&#x220F;<\/mo><mrow><mi>i<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>n<\/mi><\/munderover><mi>p<\/mi><mrow><mo fence=\"true\">(<\/mo><msup><mi>y<\/mi><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo>&#x2223;<\/mo><msup><mi mathvariant=\"bold\">x<\/mi><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo fence=\"true\">)<\/mo><\/mrow><mspace linebreak=\"newline\"><\/mspace><mo>&#x2212;<\/mo><mi>log<\/mi><mo>&#x2061;<\/mo><mi>P<\/mi><mo stretchy=\"false\">(<\/mo><mi mathvariant=\"bold\">y<\/mi><mo>&#x2223;<\/mo><mi mathvariant=\"bold\">X<\/mi><mo stretchy=\"false\">)<\/mo><mo>=<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>i<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>n<\/mi><\/munderover><mfrac><mn>1<\/mn><mn>2<\/mn><\/mfrac><mi>log<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><mn>2<\/mn><mi>&#x3C0;<\/mi><msup><mi>&#x3C3;<\/mi><mn>2<\/mn><\/msup><mo fence=\"true\">)<\/mo><\/mrow><mo>+<\/mo><mfrac><mn>1<\/mn><mrow><mn>2<\/mn><msup><mi>&#x3C3;<\/mi><mn>2<\/mn><\/msup><\/mrow><\/mfrac><msup><mrow><mo fence=\"true\">(<\/mo><msup><mi>y<\/mi><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo>&#x2212;<\/mo><msup><mi mathvariant=\"bold\">w<\/mi><mi mathvariant=\"normal\">&#x22A4;<\/mi><\/msup><msup><mi mathvariant=\"bold\">x<\/mi><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo>&#x2212;<\/mo><mi>b<\/mi><mo fence=\"true\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mrow><annotation encoding=\"application\/x-tex\">P\n(y \\mid \\mathbf{x})=\\frac{1}{\\sqrt{2 \\pi \\sigma^{2}}} \\exp \\left(-\\frac{1}{2 \\sigma^{2}}\\left(y-\\mathbf{w}^{\\top} \\mathbf{x}-b\\right)^{2}\\right)\n\\\\\nP(\\mathbf{y} \\mid \\mathbf{X})=\\prod_{i=1}^{n} p\\left(y^{(i)} \\mid \\mathbf{x}^{(i)}\\right)\n\\\\\n-\\log P(\\mathbf{y} \\mid \\mathbf{X})=\\sum_{i=1}^{n} \\frac{1}{2} \\log \\left(2 \\pi \\sigma^{2}\\right)+\\frac{1}{2 \\sigma^{2}}\\left(y^{(i)}-\\mathbf{w}^{\\top} \\mathbf{x}^{(i)}-b\\right)^{2}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">&#x2223;<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathbf\">x<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.4em;vertical-align:-0.95em;\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.3214em;\"><span style=\"top:-2.1549em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mord sqrt\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.9551em;\"><span class=\"svg-align\" style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\" style=\"padding-left:0.833em;\"><span class=\"mord\">2<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">&#x3C0;<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">&#x3C3;<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7401em;\"><span style=\"top:-2.989em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span style=\"top:-2.9151em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"hide-tail\" style=\"min-width:0.853em;height:1.08em;\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"400em\" height=\"1.08em\" viewBox=\"0 0 400000 1080\" preserveAspectRatio=\"xMinYMin slice\"><path d=\"M95,702\nc-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14\nc0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54\nc44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10\ns173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429\nc69,-144,104.5,-217.7,106.5,-221\nl0 -0\nc5.3,-9.3,12,-14,20,-14\nH400000v40H845.2724\ns-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7\nc-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z\nM834 80h400000v40h-400000z\"\/><\/svg><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.0849em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.93em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size3\">(<\/span><\/span><span class=\"mord\">&#x2212;<\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.3214em;\"><span style=\"top:-2.314em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mord\">2<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">&#x3C3;<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7401em;\"><span style=\"top:-2.989em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.686em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size1\">(<\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mord\"><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">w<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.8991em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">&#x22A4;<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathbf\">x<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mord mathnormal\">b<\/span><span class=\"mclose delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size1\">)<\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.1031em;\"><span style=\"top:-3.352em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size3\">)<\/span><\/span><\/span><\/span><span class=\"mspace newline\"><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">&#x2223;<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathbf\">X<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.9291em;vertical-align:-1.2777em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6514em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x220F;<\/span><\/span><\/span><span style=\"top:-4.3em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.2777em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord mathnormal\">p<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size2\">(<\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.938em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">&#x2223;<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mord\"><span class=\"mord mathbf\">x<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.938em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size2\">)<\/span><\/span><\/span><\/span><span class=\"mspace newline\"><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">&#x2223;<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathbf\">X<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.9291em;vertical-align:-1.2777em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6514em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.2777em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.3214em;\"><span style=\"top:-2.314em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mord\">2<\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.686em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size1\">(<\/span><\/span><span class=\"mord\">2<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">&#x3C0;<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">&#x3C3;<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.8641em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size1\">)<\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">+<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.04em;vertical-align:-0.686em;\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.3214em;\"><span style=\"top:-2.314em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mord\">2<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">&#x3C3;<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7401em;\"><span style=\"top:-2.989em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mord\">1<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.686em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size2\">(<\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.938em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mord\"><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">w<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.8991em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">&#x22A4;<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathbf\">x<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.938em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mord mathnormal\">b<\/span><span class=\"mclose delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size2\">)<\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.354em;\"><span style=\"top:-3.6029em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><br>\n&#x5728;&#x9AD8;&#x65AF;&#x566A;&#x58F0;&#x7684;&#x5047;&#x8BBE;&#x4E0B;&#xFF0C;&#x6700;&#x5C0F;&#x5316;&#x5747;&#x65B9;&#x8BEF;&#x5DEE;&#x7B49;&#x4EF7;&#x4E8E;&#x5BF9;&#x7EBF;&#x6027;&#x6A21;&#x578B;&#x7684;&#x6781;&#x5927;&#x4F3C;&#x7136;&#x4F30;&#x8BA1;&#x3002;<\/p>\n<p><strong>&#x4E0B;&#x9762;&#x662F;&#x5177;&#x4F53;&#x5B9E;&#x73B0;&#xFF1A;<\/strong><\/p>\n<h2 class=\"mume-header\" id=\"%E7%94%9F%E6%88%90%E6%95%B0%E6%8D%AE%E9%9B%86\">&#x751F;&#x6210;&#x6570;&#x636E;&#x96C6;<\/h2>\n\n<p>&#x5047;&#x8BBE;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>&#x3F5;<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">\\epsilon<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.4306em;\"><\/span><span class=\"mord mathnormal\">&#x3F5;<\/span><\/span><\/span><\/span>&#x670D;&#x4ECE;&#x6B63;&#x6001;&#x5206;&#x5E03;<\/p>\n<h2 class=\"mume-header\" id=\"%E8%AF%BB%E5%8F%96%E6%95%B0%E6%8D%AE%E9%9B%86\">&#x8BFB;&#x53D6;&#x6570;&#x636E;&#x96C6;<\/h2>\n\n<p>&#x5B9A;&#x4E49;&#x4E00;&#x4E2A;data_iter&#x51FD;&#x6570;&#xFF0C;&#x63A5;&#x53D7;&#x6279;&#x91CF;&#x5927;&#x5C0F;&#x3001;&#x7279;&#x5F81;&#x77E9;&#x9635;&#x3001;&#x6807;&#x7B7E;&#x5411;&#x91CF;&#x4F5C;&#x4E3A;&#x8F93;&#x5165;&#xFF0C;&#x751F;&#x6210;&#x5927;&#x5C0F;&#x4E3A;batch_size&#x7684;&#x5C0F;&#x6279;&#x91CF;&#x3002;<\/p>\n<p>&#x6837;&#x672C;&#x968F;&#x673A;&#x8BFB;&#x53D6;&#xFF0C;&#x7528;&#x5230;&#x7684;&#x51FD;&#x6570;&#x6709;&#xFF1A;<\/p>\n<pre data-role=\"codeBlock\" data-info=\"python\" class=\"language-python\">indices <span class=\"token operator\">=<\/span> <span class=\"token builtin\">list<\/span><span class=\"token punctuation\">(<\/span><span class=\"token builtin\">range<\/span><span class=\"token punctuation\">(<\/span>num_examples<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span>\nrandom<span class=\"token punctuation\">.<\/span>shuffle<span class=\"token punctuation\">(<\/span>indices<span class=\"token punctuation\">)<\/span>\n<\/pre><p>&#x4E3A;&#x4EC0;&#x4E48;&#x8981;&#x5206;&#x6279;&#xFF1F;&#x5229;&#x7528;GPU&#x5E76;&#x884C;&#x8FD0;&#x7B97;&#x7684;&#x4F18;&#x52BF;&#xFF0C;&#x5904;&#x7406;&#x5408;&#x7406;&#x5927;&#x5C0F;&#x7684;&#x201C;&#x5C0F;&#x6279;&#x91CF;&#x201D;&#x3002;&#x6BCF;&#x4E2A;&#x6837;&#x672C;&#x90FD;&#x53EF;&#x4EE5;&#x5E76;&#x884C;&#x5730;&#x8FDB;&#x884C;&#x6A21;&#x578B;&#x8BA1;&#x7B97;&#x3002;<\/p>\n<p>&#x81EA;&#x5B9A;&#x4E49;&#x8FED;&#x4EE3;&#x5668;&#x8981;&#x6C42;&#x5C06;&#x6240;&#x6709;&#x6570;&#x636E;&#x52A0;&#x8F7D;&#x5230;&#x5185;&#x5B58;&#x4E2D;&#xFF0C;&#x5E76;&#x6267;&#x884C;&#x5927;&#x91CF;&#x968F;&#x673A;&#x5185;&#x5B58;&#x8BBF;&#x95EE;&#xFF1B;&#x5185;&#x7F6E;&#x8FED;&#x4EE3;&#x5668;&#x6548;&#x7387;&#x8981;&#x9AD8;&#x5F88;&#x591A;&#x3002;<\/p>\n<p>&#x82E5;&#x4F7F;&#x7528;&#x6846;&#x67B6;&#x4E2D;&#x73B0;&#x6709;API&#x6765;&#x8BFB;&#x53D6;&#x6570;&#x636E;&#xFF0C;&#x5219;&#x53EF;&#x4EE5;&#x901A;&#x8FC7;<code>data.DataLoader<\/code>&#x6765;&#x5B9E;&#x73B0;&#x3002;<\/p>\n<pre data-role=\"codeBlock\" data-info=\"py\" class=\"language-python\"><span class=\"token keyword keyword-from\">from<\/span> torch<span class=\"token punctuation\">.<\/span>utils <span class=\"token keyword keyword-import\">import<\/span> data\ndataset <span class=\"token operator\">=<\/span> data<span class=\"token punctuation\">.<\/span>TensorDataset<span class=\"token punctuation\">(<\/span>X<span class=\"token punctuation\">,<\/span> y<span class=\"token punctuation\">)<\/span> <span class=\"token comment\"># X, y&#x7684;&#x7B2C;&#x4E00;&#x7EF4;&#x5EA6;&#x5F53;&#x76F8;&#x7B49;<\/span>\ndata_iter <span class=\"token operator\">=<\/span> data<span class=\"token punctuation\">.<\/span>DataLoader<span class=\"token punctuation\">(<\/span>dataset<span class=\"token operator\">=<\/span>dataset<span class=\"token punctuation\">,<\/span> batch_size<span class=\"token operator\">=<\/span>batch_size<span class=\"token punctuation\">,<\/span> shuffle<span class=\"token operator\">=<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\">)<\/span>\n<\/pre><h2 class=\"mume-header\" id=\"%E5%AE%9A%E4%B9%89%E6%A8%A1%E5%9E%8B\">&#x5B9A;&#x4E49;&#x6A21;&#x578B;<\/h2>\n\n<p>&#x7EBF;&#x6027;&#x56DE;&#x5F52;&#x6A21;&#x578B;<\/p>\n<pre data-role=\"codeBlock\" data-info=\"python\" class=\"language-python\"><span class=\"token keyword keyword-from\">from<\/span> torch <span class=\"token keyword keyword-import\">import<\/span> nn\n<span class=\"token comment\"># &#x7B2C;&#x4E00;&#x4E2A;&#x53C2;&#x6570;&#x6307;&#x5B9A;&#x8F93;&#x5165;&#x7279;&#x5F81;&#x5F62;&#x72B6;&#xFF0C;&#x7B2C;&#x4E8C;&#x4E2A;&#x53C2;&#x6570;&#x6307;&#x5B9A;&#x8F93;&#x51FA;&#x7279;&#x5F81;&#x5F62;&#x72B6;<\/span>\nnet <span class=\"token operator\">=<\/span> nn<span class=\"token punctuation\">.<\/span>Sequential<span class=\"token punctuation\">(<\/span>nn<span class=\"token punctuation\">.<\/span>Linear<span class=\"token punctuation\">(<\/span><span class=\"token number\">2<\/span><span class=\"token punctuation\">,<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">;<\/span>\n<\/pre><h2 class=\"mume-header\" id=\"%E5%88%9D%E5%A7%8B%E5%8C%96%E6%A8%A1%E5%9E%8B%E5%8F%82%E6%95%B0\">&#x521D;&#x59CB;&#x5316;&#x6A21;&#x578B;&#x53C2;&#x6570;<\/h2>\n\n<p>&#x5982;&#x679C;&#x4F7F;&#x7528;&#x81EA;&#x5B9A;&#x4E49;&#x7684;&#x6A21;&#x578B;&#xFF0C;&#x9700;&#x8981;<code>requires_grad=True<\/code><\/p>\n<p>&#x5982;&#x679C;&#x4F7F;&#x7528;&#x9884;&#x5B9A;&#x4E49;&#x7684;&#x6846;&#x67B6;&#xFF0C;&#x53EF;&#x4EE5;&#x76F4;&#x63A5;&#x8BBF;&#x95EE;&#x53C2;&#x6570;&#x4EE5;&#x8BBE;&#x5B9A;&#x521D;&#x59CB;&#x503C;&#x3002;<\/p>\n<ol>\n<li>&#x901A;&#x8FC7;<code>net[0]<\/code>&#x9009;&#x62E9;&#x7F51;&#x7EDC;&#x4E2D;&#x7684;&#x7B2C;&#x4E00;&#x4E2A;&#x56FE;&#x5C42;<\/li>\n<li>&#x4F7F;&#x7528;<code>weight.data<\/code>&#x548C;<code>bias.data<\/code>&#x65B9;&#x6CD5;&#x8BBF;&#x95EE;&#x53C2;&#x6570;<\/li>\n<li>&#x4F7F;&#x7528;&#x66FF;&#x6362;&#x65B9;&#x6CD5;<code>normal_<\/code>&#x548C;<code>fill_<\/code>&#x6765;&#x91CD;&#x5199;&#x53C2;&#x6570;&#x503C;<\/li>\n<\/ol>\n<pre data-role=\"codeBlock\" data-info=\"python\" class=\"language-python\">net<span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">.<\/span>weight<span class=\"token punctuation\">.<\/span>data<span class=\"token punctuation\">.<\/span>normal_<span class=\"token punctuation\">(<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">,<\/span> <span class=\"token number\">0.01<\/span><span class=\"token punctuation\">)<\/span>\nnet<span class=\"token punctuation\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">.<\/span>bias<span class=\"token punctuation\">.<\/span>data<span class=\"token punctuation\">.<\/span>fill_<span class=\"token punctuation\">(<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">)<\/span>\n<\/pre><h2 class=\"mume-header\" id=\"%E5%AE%9A%E4%B9%89%E6%8D%9F%E5%A4%B1%E5%87%BD%E6%95%B0\">&#x5B9A;&#x4E49;&#x635F;&#x5931;&#x51FD;&#x6570;<\/h2>\n\n<p>&#x5E73;&#x65B9;&#x635F;&#x5931;&#xFF0C;MSELoss&#x7C7B;&#xFF08;Mean Square Error&#xFF09;&#xFF0C;squared <span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mi>L<\/mi><mn>2<\/mn><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">L_2<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.8333em;vertical-align:-0.15em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">L<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3011em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\">2<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span> norm<\/p>\n<pre data-role=\"codeBlock\" data-info=\"python\" class=\"language-python\">loss <span class=\"token operator\">=<\/span> nn<span class=\"token punctuation\">.<\/span>MSELoss<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\n<\/pre><h2 class=\"mume-header\" id=\"%E5%AE%9A%E4%B9%89%E4%BC%98%E5%8C%96%E7%AE%97%E6%B3%95\">&#x5B9A;&#x4E49;&#x4F18;&#x5316;&#x7B97;&#x6CD5;<\/h2>\n\n<p>&#x5C0F;&#x6279;&#x91CF;&#x968F;&#x673A;&#x68AF;&#x5EA6;&#x4E0B;&#x964D;<\/p>\n<p>&#x4E00;&#x4E9B;&#x6280;&#x5DE7;&#xFF1A;<\/p>\n<pre data-role=\"codeBlock\" data-info=\"py\" class=\"language-python\"><span class=\"token keyword keyword-with\">with<\/span> torch<span class=\"token punctuation\">.<\/span>no_grad<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\n<\/pre><blockquote>\n<p>Disabling gradient calculation is useful for inference, when you are sure that you will not call :meth:<code>Tensor.backward()<\/code>. It will reduce memory consumption for computations that would otherwise have <code>requires_grad=True<\/code>.<\/p>\n<\/blockquote>\n<p>&#x82E5;&#x4F7F;&#x7528;&#x6846;&#x67B6;&#x9884;&#x5B9A;&#x4E49;&#x7684;SGD&#xFF08;Stochastic gradient descent&#xFF09;&#x7B97;&#x6CD5;&#xFF1A;<\/p>\n<pre data-role=\"codeBlock\" data-info=\"python\" class=\"language-python\"><span class=\"token comment\"># net.parameters()&#x8FD4;&#x56DE;&#x4E00;&#x4E2A;&#x8FED;&#x4EE3;&#x5668;&#xFF0C;&#x5305;&#x542B;net&#x6A21;&#x578B;&#x7684;&#x6240;&#x6709;&#x53C2;&#x6570;<\/span>\ntrainer <span class=\"token operator\">=<\/span> torch<span class=\"token punctuation\">.<\/span>optim<span class=\"token punctuation\">.<\/span>SGD<span class=\"token punctuation\">(<\/span>net<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">=<\/span><span class=\"token number\">0.03<\/span><span class=\"token punctuation\">)<\/span>\n<\/pre><h2 class=\"mume-header\" id=\"%E8%AE%AD%E7%BB%83\">&#x8BAD;&#x7EC3;<\/h2>\n\n<p>&#x5728;&#x6BCF;&#x6B21;&#x8FED;&#x4EE3;&#x4E2D;&#xFF0C;&#x8BFB;&#x53D6;&#x4E00;&#x5C0F;&#x6279;&#x91CF;&#x8BAD;&#x7EC3;&#x6837;&#x672C;&#xFF0C;&#x5E76;&#x901A;&#x8FC7;&#x6A21;&#x578B;&#x6765;&#x83B7;&#x5F97;&#x4E00;&#x7EC4;&#x9884;&#x6D4B;&#x3002;&#x8BA1;&#x7B97;&#x5B8C;&#x635F;&#x5931;&#x540E;&#xFF0C;&#x5F00;&#x59CB;&#x53CD;&#x5411;&#x4F20;&#x64AD;&#xFF0C;&#x5B58;&#x50A8;&#x6BCF;&#x4E2A;&#x53C2;&#x6570;&#x7684;&#x68AF;&#x5EA6;&#x3002;&#x6700;&#x540E;&#xFF0C;&#x8C03;&#x7528;&#x4F18;&#x5316;&#x7B97;&#x6CD5;sgd&#x6765;&#x66F4;&#x65B0;&#x6A21;&#x578B;&#x53C2;&#x6570;&#x3002;<\/p>\n<p>&#x8FED;&#x4EE3;&#x5468;&#x671F;&#x4E2A;&#x6570;num_epochs&#x548C;&#x5B66;&#x4E60;&#x7387;learning_rate&#x90FD;&#x662F;&#x8D85;&#x53C2;&#x6570;&#x3002;<\/p>\n<pre data-role=\"codeBlock\" data-info=\"python\" class=\"language-python\"><span class=\"token keyword keyword-for\">for<\/span> epoch <span class=\"token keyword keyword-in\">in<\/span> <span class=\"token builtin\">range<\/span><span class=\"token punctuation\">(<\/span>num_epochs<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\n    <span class=\"token keyword keyword-for\">for<\/span> X<span class=\"token punctuation\">,<\/span> y <span class=\"token keyword keyword-in\">in<\/span> data_iter<span class=\"token punctuation\">:<\/span>\n        l <span class=\"token operator\">=<\/span> loss<span class=\"token punctuation\">(<\/span>net<span class=\"token punctuation\">(<\/span>X<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> y<span class=\"token punctuation\">)<\/span>\n        trainer<span class=\"token punctuation\">.<\/span>zero_grad<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\n        l<span class=\"token punctuation\">.<\/span>backward<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\n        trainer<span class=\"token punctuation\">.<\/span>step<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\n    l <span class=\"token operator\">=<\/span> loss<span class=\"token punctuation\">(<\/span>net<span class=\"token punctuation\">(<\/span>features<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> labels<span class=\"token punctuation\">)<\/span>\n    <span class=\"token keyword keyword-print\">print<\/span><span class=\"token punctuation\">(<\/span><span class=\"token string-interpolation\"><span class=\"token string\">f&apos;epoch <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>epoch <span class=\"token operator\">+<\/span> <span class=\"token number\">1<\/span><span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">, loss <\/span><span class=\"token interpolation\"><span class=\"token punctuation\">{<\/span>l<span class=\"token punctuation\">:<\/span><span class=\"token format-spec\">f<\/span><span class=\"token punctuation\">}<\/span><\/span><span class=\"token string\">&apos;<\/span><\/span><span class=\"token punctuation\">)<\/span>\n<\/pre><h1 class=\"mume-header\" id=\"softmax%E5%9B%9E%E5%BD%92\">softmax&#x56DE;&#x5F52;<\/h1>\n\n<h2 class=\"mume-header\" id=\"%E5%88%86%E7%B1%BB%E9%97%AE%E9%A2%98\">&#x5206;&#x7C7B;&#x95EE;&#x9898;<\/h2>\n\n<p>&#x5373;&#x4F7F;&#x6211;&#x4EEC;&#x53EA;&#x5173;&#x5FC3;&#x786C;&#x7C7B;&#x522B;&#xFF0C;&#x6211;&#x4EEC;&#x4ECD;&#x7136;&#x4F7F;&#x7528;&#x8F6F;&#x7C7B;&#x522B;&#x7684;&#x6A21;&#x578B;&#x2014;&#x2014;&#x7528;&#x56DE;&#x5F52;&#x89E3;&#x51B3;&#x5206;&#x7C7B;&#x95EE;&#x9898;&#x3002;<\/p>\n<p>&#x8868;&#x793A;&#x5206;&#x7C7B;&#x6570;&#x636E;&#x7684;&#x65B9;&#x6CD5;&#xFF1A;&#x72EC;&#x70ED;&#x7F16;&#x7801;&#xFF08;&#x5411;&#x91CF;&#x8868;&#x793A;&#xFF09;<\/p>\n<h2 class=\"mume-header\" id=\"softmax%E8%BF%90%E7%AE%97\">softmax&#x8FD0;&#x7B97;<\/h2>\n\n<p>&#x8F93;&#x51FA;&#x89C4;&#x8303;&#x5316;&#xFF1A;<\/p>\n<ol>\n<li>&#x975E;&#x8D1F;<\/li>\n<li>&#x603B;&#x548C;&#x4E3A;1<\/li>\n<li>&#x6821;&#x51C6;&#xFF08;calibration&#xFF09;&#xFF1A;&#x5728;&#x5206;&#x7C7B;&#x5668;&#x8F93;&#x51FA;0.5&#x7684;&#x6240;&#x6709;&#x6837;&#x672C;&#x4E2D;&#xFF0C;&#x6709;&#x4E00;&#x534A;&#x5B9E;&#x9645;&#x5C5E;&#x4E8E;&#x9884;&#x6D4B;&#x7684;&#x7C7B;&#x3002;<\/li>\n<\/ol>\n<p><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mover accent=\"true\"><mi mathvariant=\"bold\">y<\/mi><mo>^<\/mo><\/mover><mo>=<\/mo><mi mathvariant=\"normal\">softmax<\/mi><mo>&#x2061;<\/mo><mo stretchy=\"false\">(<\/mo><mi mathvariant=\"bold\">o<\/mi><mo stretchy=\"false\">)<\/mo><mspace width=\"1em\"><\/mspace><mtext>&#xA0;&#x5176;&#x4E2D;&#xA0;<\/mtext><mspace width=\"1em\"><\/mspace><msub><mover accent=\"true\"><mi>y<\/mi><mo>^<\/mo><\/mover><mi>j<\/mi><\/msub><mo>=<\/mo><mfrac><mrow><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><mrow><munder><mo>&#x2211;<\/mo><mi>k<\/mi><\/munder><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\\hat{\\mathbf{y}}=\\operatorname{softmax}(\\mathbf{o}) \\quad \\text { &#x5176;&#x4E2D; } \\quad \\hat{y}_{j}=\\frac{\\exp \\left(o_{j}\\right)}{\\sum_{k} \\exp \\left(o_{k}\\right)}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.9023em;vertical-align:-0.1944em;\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7079em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><\/span><span style=\"top:-3.0134em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1.0361em;vertical-align:-0.2861em;\"><\/span><span class=\"mop\"><span class=\"mord mathrm\">softmax<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathbf\">o<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:1em;\"><\/span><span class=\"mord text\"><span class=\"mord\">&#xA0;<\/span><span class=\"mord cjk_fallback\">&#x5176;&#x4E2D;<\/span><span class=\"mord\">&#xA0;<\/span><\/span><span class=\"mspace\" style=\"margin-right:1em;\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.6944em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><\/span><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.4127em;vertical-align:-0.9857em;\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.427em;\"><span style=\"top:-2.314em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position:relative;top:0em;\">&#x2211;<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1864em;\"><span style=\"top:-2.4003em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2997em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.9857em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><\/span><br>\n&#x6700;&#x53EF;&#x80FD;&#x7684;&#x7C7B;&#x522B;&#x4E3A;<br>\n<span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mi><munder><mo><mi mathvariant=\"normal\">argmax<\/mi><mo>&#x2061;<\/mo><\/mo><mi>j<\/mi><\/munder><\/mi><msub><mover accent=\"true\"><mi>y<\/mi><mo>^<\/mo><\/mover><mi>j<\/mi><\/msub><mo>=<\/mo><mi><munder><mo><mi mathvariant=\"normal\">argmax<\/mi><mo>&#x2061;<\/mo><\/mo><mi>j<\/mi><\/munder><\/mi><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\\underset{j}{\\operatorname{argmax}} \\hat{y}_{j}=\\underset{j}{\\operatorname{argmax}} o_{j}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1.7527em;vertical-align:-1.0582em;\"><\/span><span class=\"mord\"><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.4306em;\"><span style=\"top:-2.1779em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span><span class=\"mop\"><span class=\"mop\"><span class=\"mord mathrm\">argmax<\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.0582em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.6944em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><\/span><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1.4888em;vertical-align:-1.0582em;\"><\/span><span class=\"mord\"><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.4306em;\"><span style=\"top:-2.1779em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span><span class=\"mop\"><span class=\"mop\"><span class=\"mord mathrm\">argmax<\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.0582em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<h2 class=\"mume-header\" id=\"%E4%BA%A4%E5%8F%89%E7%86%B5%E6%8D%9F%E5%A4%B1\">&#x4EA4;&#x53C9;&#x71B5;&#x635F;&#x5931;<\/h2>\n\n<p>&#x8BBE;&#x7279;&#x5F81;&#x7EF4;&#x5EA6;&#xFF08;&#x8F93;&#x5165;&#x6570;&#x91CF;&#xFF09;&#x4E3A;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>d<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">d<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.6944em;\"><\/span><span class=\"mord mathnormal\">d<\/span><\/span><\/span><\/span>&#xFF0C;&#x6279;&#x91CF;&#x5927;&#x5C0F;&#x4E3A;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>n<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">n<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.4306em;\"><\/span><span class=\"mord mathnormal\">n<\/span><\/span><\/span><\/span>&#xFF0C;&#x8F93;&#x51FA;&#x7C7B;&#x522B;&#x6570;&#x4E3A;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>q<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">q<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.625em;vertical-align:-0.1944em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span>&#xFF0C;&#x5219;&#x6839;&#x636E;&#x6700;&#x5C0F;&#x5316;&#x8D1F;&#x5BF9;&#x6570;&#x4F3C;&#x7136;&#xFF0C;&#x6709;&#xFF1A;<br>\n<span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mo>&#x2212;<\/mo><mi>log<\/mi><mo>&#x2061;<\/mo><mi>P<\/mi><mo stretchy=\"false\">(<\/mo><mi mathvariant=\"bold\">Y<\/mi><mo>&#x2223;<\/mo><mi mathvariant=\"bold\">X<\/mi><mo stretchy=\"false\">)<\/mo><mo>=<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>i<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>n<\/mi><\/munderover><mo>&#x2212;<\/mo><mi>log<\/mi><mo>&#x2061;<\/mo><mi>P<\/mi><mrow><mo fence=\"true\">(<\/mo><msup><mi mathvariant=\"bold\">y<\/mi><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo>&#x2223;<\/mo><msup><mi mathvariant=\"bold\">x<\/mi><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo fence=\"true\">)<\/mo><\/mrow><mo>=<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>i<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>n<\/mi><\/munderover><mi>l<\/mi><mrow><mo fence=\"true\">(<\/mo><msup><mi mathvariant=\"bold\">y<\/mi><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo separator=\"true\">,<\/mo><msup><mover accent=\"true\"><mi mathvariant=\"bold\">y<\/mi><mo>^<\/mo><\/mover><mrow><mo stretchy=\"false\">(<\/mo><mi>i<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><\/msup><mo fence=\"true\">)<\/mo><\/mrow><mo separator=\"true\">,<\/mo><\/mrow><annotation encoding=\"application\/x-tex\">-\\log P(\\mathbf{Y} \\mid \\mathbf{X})=\\sum_{i=1}^{n}-\\log P\\left(\\mathbf{y}^{(i)} \\mid \\mathbf{x}^{(i)}\\right)=\\sum_{i=1}^{n} l\\left(\\mathbf{y}^{(i)}, \\hat{\\mathbf{y}}^{(i)}\\right),<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathbf\" style=\"margin-right:0.02875em;\">Y<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">&#x2223;<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathbf\">X<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.9291em;vertical-align:-1.2777em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6514em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.2777em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size2\">(<\/span><\/span><span class=\"mord\"><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.938em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">&#x2223;<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mord\"><span class=\"mord mathbf\">x<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.938em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size2\">)<\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.9291em;vertical-align:-1.2777em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6514em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">n<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.2777em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.01968em;\">l<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size2\">(<\/span><\/span><span class=\"mord\"><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.938em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7079em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><\/span><span style=\"top:-3.0134em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.938em;\"><span style=\"top:-3.113em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mopen mtight\">(<\/span><span class=\"mord mathnormal mtight\">i<\/span><span class=\"mclose mtight\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size2\">)<\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mpunct\">,<\/span><\/span><\/span><\/span><\/span><br>\n&#x5BF9;&#x4E8E;&#x7279;&#x5B9A;&#x7684;&#x6807;&#x7B7E;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi mathvariant=\"bold\">y<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">\\mathbf{y}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.6389em;vertical-align:-0.1944em;\"><\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><\/span><\/span><\/span>&#x548C;&#x6A21;&#x578B;&#x9884;&#x6D4B;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mover accent=\"true\"><mi mathvariant=\"bold\">y<\/mi><mo>^<\/mo><\/mover><\/mrow><annotation encoding=\"application\/x-tex\">\\hat{\\mathbf{y}}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.9023em;vertical-align:-0.1944em;\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7079em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><\/span><span style=\"top:-3.0134em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#xFF0C;&#x635F;&#x5931;&#x51FD;&#x6570;&#x4E3A;<br>\n<span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mi>l<\/mi><mo stretchy=\"false\">(<\/mo><mi mathvariant=\"bold\">y<\/mi><mo separator=\"true\">,<\/mo><mover accent=\"true\"><mi mathvariant=\"bold\">y<\/mi><mo>^<\/mo><\/mover><mo stretchy=\"false\">)<\/mo><mo>=<\/mo><mo>&#x2212;<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>j<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>q<\/mi><\/munderover><msub><mi>y<\/mi><mi>j<\/mi><\/msub><mi>log<\/mi><mo>&#x2061;<\/mo><msub><mover accent=\"true\"><mi>y<\/mi><mo>^<\/mo><\/mover><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">l(\\mathbf{y}, \\hat{\\mathbf{y}})=-\\sum_{j=1}^{q} y_{j} \\log \\hat{y}_{j}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.01968em;\">l<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7079em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><\/span><span style=\"top:-3.0134em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:3.1123em;vertical-align:-1.4138em;\"><\/span><span class=\"mord\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6985em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3471em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.4138em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.6944em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><\/span><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><br>\n&#x7531;&#x4E8E;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi mathvariant=\"bold\">y<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">\\mathbf{y}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.6389em;vertical-align:-0.1944em;\"><\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><\/span><\/span><\/span>&#x72EC;&#x70ED;&#x7F16;&#x7801;&#xFF0C;&#x4E0A;&#x5F0F;&#x53F3;&#x8FB9;&#x53EA;&#x6709;&#x4E00;&#x9879;&#x975E;&#x96F6;&#xFF0C;&#x6B64;&#x9879;&#x5373;&#x4E3A;&#x9884;&#x6D4B;&#x6982;&#x7387;&#x7684;&#x8D1F;&#x5BF9;&#x6570;&#x3002;<\/p>\n<p><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mtable rowspacing=\"0.25em\" columnalign=\"right left\" columnspacing=\"0em\"><mtr><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><mi>l<\/mi><mo stretchy=\"false\">(<\/mo><mi mathvariant=\"bold\">y<\/mi><mo separator=\"true\">,<\/mo><mover accent=\"true\"><mi mathvariant=\"bold\">y<\/mi><mo>^<\/mo><\/mover><mo stretchy=\"false\">)<\/mo><\/mrow><\/mstyle><\/mtd><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><mrow><\/mrow><mo>=<\/mo><mo>&#x2212;<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>j<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>q<\/mi><\/munderover><msub><mi>y<\/mi><mi>j<\/mi><\/msub><mi>log<\/mi><mo>&#x2061;<\/mo><mfrac><mrow><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><mrow><munderover><mo>&#x2211;<\/mo><mrow><mi>k<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>q<\/mi><\/munderover><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><\/mfrac><\/mrow><\/mstyle><\/mtd><\/mtr><mtr><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><\/mrow><\/mstyle><\/mtd><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><mrow><\/mrow><mo>=<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>j<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>q<\/mi><\/munderover><msub><mi>y<\/mi><mi>j<\/mi><\/msub><mi>log<\/mi><mo>&#x2061;<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>k<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>q<\/mi><\/munderover><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo>&#x2212;<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>j<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>q<\/mi><\/munderover><msub><mi>y<\/mi><mi>j<\/mi><\/msub><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mrow><\/mstyle><\/mtd><\/mtr><mtr><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><\/mrow><\/mstyle><\/mtd><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><mrow><\/mrow><mo>=<\/mo><mi>log<\/mi><mo>&#x2061;<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>k<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>q<\/mi><\/munderover><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo>&#x2212;<\/mo><munderover><mo>&#x2211;<\/mo><mrow><mi>j<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>q<\/mi><\/munderover><msub><mi>y<\/mi><mi>j<\/mi><\/msub><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mi mathvariant=\"normal\">.<\/mi><\/mrow><\/mstyle><\/mtd><\/mtr><\/mtable><mspace linebreak=\"newline\"><\/mspace><msub><mi mathvariant=\"normal\">&#x2202;<\/mi><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/msub><mi>l<\/mi><mo stretchy=\"false\">(<\/mo><mi mathvariant=\"bold\">y<\/mi><mo separator=\"true\">,<\/mo><mover accent=\"true\"><mi mathvariant=\"bold\">y<\/mi><mo>^<\/mo><\/mover><mo stretchy=\"false\">)<\/mo><mo>=<\/mo><mfrac><mrow><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><mrow><munderover><mo>&#x2211;<\/mo><mrow><mi>k<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>q<\/mi><\/munderover><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><\/mfrac><mo>&#x2212;<\/mo><msub><mi>y<\/mi><mi>j<\/mi><\/msub><mo>=<\/mo><mi mathvariant=\"normal\">softmax<\/mi><mo>&#x2061;<\/mo><mo stretchy=\"false\">(<\/mo><mi mathvariant=\"bold\">o<\/mi><msub><mo stretchy=\"false\">)<\/mo><mi>j<\/mi><\/msub><mo>&#x2212;<\/mo><msub><mi>y<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\\begin{aligned}\nl(\\mathbf{y}, \\hat{\\mathbf{y}}) &amp;=-\\sum_{j=1}^{q} y_{j} \\log \\frac{\\exp \\left(o_{j}\\right)}{\\sum_{k=1}^{q} \\exp \\left(o_{k}\\right)} \\\\\n&amp;=\\sum_{j=1}^{q} y_{j} \\log \\sum_{k=1}^{q} \\exp \\left(o_{k}\\right)-\\sum_{j=1}^{q} y_{j} o_{j} \\\\\n&amp;=\\log \\sum_{k=1}^{q} \\exp \\left(o_{k}\\right)-\\sum_{j=1}^{q} y_{j} o_{j} .\n\\end{aligned}\n\\\\\n\\partial_{o_{j}} l(\\mathbf{y}, \\hat{\\mathbf{y}})=\\frac{\\exp \\left(o_{j}\\right)}{\\sum_{k=1}^{q} \\exp \\left(o_{k}\\right)}-y_{j}=\\operatorname{softmax}(\\mathbf{o})_{j}-y_{j}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:10.2368em;vertical-align:-4.8684em;\"><\/span><span class=\"mord\"><span class=\"mtable\"><span class=\"col-align-r\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:5.3684em;\"><span style=\"top:-7.3684em;\"><span class=\"pstrut\" style=\"height:3.6985em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.01968em;\">l<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7079em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><\/span><span style=\"top:-3.0134em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><\/span><\/span><span style=\"top:-3.9561em;\"><span class=\"pstrut\" style=\"height:3.6985em;\"><\/span><span class=\"mord\"><\/span><\/span><span style=\"top:-0.5439em;\"><span class=\"pstrut\" style=\"height:3.6985em;\"><\/span><span class=\"mord\"><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:4.8684em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"col-align-l\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:5.3684em;\"><span style=\"top:-7.3684em;\"><span class=\"pstrut\" style=\"height:3.6985em;\"><\/span><span class=\"mord\"><span class=\"mord\"><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mord\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6985em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3471em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.4138em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.427em;\"><span style=\"top:-2.3057em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position:relative;top:0em;\">&#x2211;<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.8043em;\"><span style=\"top:-2.4003em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.2029em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2997em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.994em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><span style=\"top:-3.9561em;\"><span class=\"pstrut\" style=\"height:3.6985em;\"><\/span><span class=\"mord\"><span class=\"mord\"><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6985em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3471em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.4138em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6985em;\"><span style=\"top:-1.8479em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3471em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.3021em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6985em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3471em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.4138em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span style=\"top:-0.5439em;\"><span class=\"pstrut\" style=\"height:3.6985em;\"><\/span><span class=\"mord\"><span class=\"mord\"><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6985em;\"><span style=\"top:-1.8479em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3471em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.3021em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.6985em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><span style=\"top:-4.3471em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.4138em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">.<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:4.8684em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace newline\"><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1.0973em;vertical-align:-0.3473em;\"><\/span><span class=\"mord\"><span class=\"mord\" style=\"margin-right:0.05556em;\">&#x2202;<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1514em;\"><span style=\"top:-2.55em;margin-left:-0.0556em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3281em;\"><span style=\"top:-2.357em;margin-left:0em;margin-right:0.0714em;\"><span class=\"pstrut\" style=\"height:2.5em;\"><\/span><span class=\"sizing reset-size3 size1 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2819em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3473em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.01968em;\">l<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7079em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathbf\" style=\"margin-right:0.01597em;\">y<\/span><\/span><span style=\"top:-3.0134em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.421em;vertical-align:-0.994em;\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.427em;\"><span style=\"top:-2.3057em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position:relative;top:0em;\">&#x2211;<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.8043em;\"><span style=\"top:-2.4003em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><span class=\"mrel mtight\">=<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><span style=\"top:-3.2029em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em;\">q<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2997em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.994em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:0.7167em;vertical-align:-0.2861em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1.0361em;vertical-align:-0.2861em;\"><\/span><span class=\"mop\"><span class=\"mord mathrm\">softmax<\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathbf\">o<\/span><span class=\"mclose\"><span class=\"mclose\">)<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:0.7167em;vertical-align:-0.2861em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<p>&#x4E00;&#x884C;&#x4EE3;&#x7801;&#x5B9E;&#x73B0;&#x4EA4;&#x53C9;&#x71B5;&#x51FD;&#x6570;&#xFF1A;<\/p>\n<pre data-role=\"codeBlock\" data-info=\"python\" class=\"language-python\"><span class=\"token keyword keyword-def\">def<\/span> <span class=\"token function\">cross_entropy<\/span><span class=\"token punctuation\">(<\/span>y_hat<span class=\"token punctuation\">,<\/span> y<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\n    <span class=\"token keyword keyword-return\">return<\/span> <span class=\"token operator\">-<\/span>torch<span class=\"token punctuation\">.<\/span>log<span class=\"token punctuation\">(<\/span>y_hat<span class=\"token punctuation\">[<\/span><span class=\"token builtin\">range<\/span><span class=\"token punctuation\">(<\/span><span class=\"token builtin\">len<\/span><span class=\"token punctuation\">(<\/span>y_hat<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span>y<span class=\"token punctuation\">]<\/span><span class=\"token punctuation\">)<\/span>\n<\/pre><p>&#x5B9E;&#x9645;&#x5E94;&#x7528;&#x4E2D;&#xFF0C;&#x5728;&#x5BF9;y_hat&#x7684;&#x8BA1;&#x7B97;&#x65F6;&#xFF0C;&#x82E5;&#x4E00;&#x4E9B;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">o_j<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.7167em;vertical-align:-0.2861em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#x975E;&#x5E38;&#x5927;&#xFF0C;&#x4F1A;&#x5BFC;&#x81F4;&#x4E0A;&#x6EA2;&#x4E3A;inf&#xFF0C;&#x89E3;&#x51B3;&#x65B9;&#x6848;&#x662F;&#x5728;&#x8BA1;&#x7B97;softmax&#x524D;&#x505A;&#x4E00;&#x6B65;&#x51CF;&#x6CD5;&#xFF1A;<br>\n<span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mtable rowspacing=\"0.25em\" columnalign=\"right left\" columnspacing=\"0em\"><mtr><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><msub><mover accent=\"true\"><mi>y<\/mi><mo>^<\/mo><\/mover><mi>j<\/mi><\/msub><\/mstyle><\/mtd><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><mrow><\/mrow><mo>=<\/mo><mfrac><mrow><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo>&#x2212;<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><mrow><munder><mo>&#x2211;<\/mo><mi>k<\/mi><\/munder><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>&#x2212;<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><\/mfrac><\/mrow><\/mstyle><\/mtd><\/mtr><mtr><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><\/mrow><\/mstyle><\/mtd><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><mrow><\/mrow><mo>=<\/mo><mfrac><mrow><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo>&#x2212;<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><mrow><munder><mo>&#x2211;<\/mo><mi>k<\/mi><\/munder><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>&#x2212;<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><\/mfrac><\/mrow><\/mstyle><\/mtd><\/mtr><\/mtable><annotation encoding=\"application\/x-tex\">\\begin{aligned}\n\\hat{y}_{j} &amp;=\\frac{\\exp \\left(o_{j}-\\max \\left(o_{k}\\right)\\right) \\exp \\left(\\max \\left(o_{k}\\right)\\right)}{\\sum_{k} \\exp \\left(o_{k}-\\max \\left(o_{k}\\right)\\right) \\exp \\left(\\max \\left(o_{k}\\right)\\right)} \\\\\n&amp;=\\frac{\\exp \\left(o_{j}-\\max \\left(o_{k}\\right)\\right)}{\\sum_{k} \\exp \\left(o_{k}-\\max \\left(o_{k}\\right)\\right)}\n\\end{aligned}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:5.4254em;vertical-align:-2.4627em;\"><\/span><span class=\"mord\"><span class=\"mtable\"><span class=\"col-align-r\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:2.9627em;\"><span style=\"top:-4.9627em;\"><span class=\"pstrut\" style=\"height:3.427em;\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.6944em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><\/span><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span style=\"top:-2.25em;\"><span class=\"pstrut\" style=\"height:3.427em;\"><\/span><span class=\"mord\"><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:2.4627em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"col-align-l\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:2.9627em;\"><span style=\"top:-4.9627em;\"><span class=\"pstrut\" style=\"height:3.427em;\"><\/span><span class=\"mord\"><span class=\"mord\"><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.427em;\"><span style=\"top:-2.314em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position:relative;top:0em;\">&#x2211;<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1864em;\"><span style=\"top:-2.4003em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2997em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.9857em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><span style=\"top:-2.25em;\"><span class=\"pstrut\" style=\"height:3.427em;\"><\/span><span class=\"mord\"><span class=\"mord\"><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.427em;\"><span style=\"top:-2.314em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position:relative;top:0em;\">&#x2211;<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1864em;\"><span style=\"top:-2.4003em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2997em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.9857em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:2.4627em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><br>\n&#x82E5;&#x4E00;&#x4E9B;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">o_j<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.7167em;vertical-align:-0.2861em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#x975E;&#x5E38;&#x5C0F;&#xFF0C;&#x4F1A;&#x5BFC;&#x81F4;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mover accent=\"true\"><mi>y<\/mi><mo>^<\/mo><\/mover><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\\hat{y}_j<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.9805em;vertical-align:-0.2861em;\"><\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.6944em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><\/span><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#x4E3A;&#x4E3A;&#x96F6;&#xFF0C;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>log<\/mi><mo>&#x2061;<\/mo><msub><mover accent=\"true\"><mi>y<\/mi><mo>^<\/mo><\/mover><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\\log{\\hat{y}_j}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.9805em;vertical-align:-0.2861em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\"><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.6944em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><\/span><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span>&#x503C;&#x4E3A;-inf&#xFF0C;&#x53CD;&#x5411;&#x4F20;&#x64AD;&#x540E;&#x4F1A;&#x51FA;&#x73B0;nan&#xFF0C;&#x89E3;&#x51B3;&#x65B9;&#x6848;&#x662F;&#x6309;&#x7167;&#x4E0B;&#x8FF0;&#x5316;&#x7B80;&#x5F0F;&#x8BA1;&#x7B97;&#x4EA4;&#x53C9;&#x71B5;&#x635F;&#x5931;&#x51FD;&#x6570;&#xFF1A;<br>\n<span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mtable rowspacing=\"0.25em\" columnalign=\"right left\" columnspacing=\"0em\"><mtr><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><mi>log<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mover accent=\"true\"><mi>y<\/mi><mo>^<\/mo><\/mover><mi>j<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><\/mstyle><\/mtd><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><mrow><\/mrow><mo>=<\/mo><mi>log<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><mfrac><mrow><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo>&#x2212;<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><mrow><munder><mo>&#x2211;<\/mo><mi>k<\/mi><\/munder><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>&#x2212;<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><\/mfrac><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><\/mstyle><\/mtd><\/mtr><mtr><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><\/mrow><\/mstyle><\/mtd><mtd><mstyle scriptlevel=\"0\" displaystyle=\"true\"><mrow><mrow><\/mrow><mo>=<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo>&#x2212;<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo>&#x2212;<\/mo><mi>log<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><munder><mo>&#x2211;<\/mo><mi>k<\/mi><\/munder><mi>exp<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>&#x2212;<\/mo><mi>max<\/mi><mo>&#x2061;<\/mo><mrow><mo fence=\"true\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><\/mstyle><\/mtd><\/mtr><\/mtable><annotation encoding=\"application\/x-tex\">\\begin{aligned}\n\\log \\left(\\hat{y}_{j}\\right) &amp;=\\log \\left(\\frac{\\exp \\left(o_{j}-\\max \\left(o_{k}\\right)\\right)}{\\sum_{k} \\exp \\left(o_{k}-\\max \\left(o_{k}\\right)\\right)}\\right) \\\\\n&amp;=o_{j}-\\max \\left(o_{k}\\right)-\\log \\left(\\sum_{k} \\exp \\left(o_{k}-\\max \\left(o_{k}\\right)\\right)\\right)\n\\end{aligned}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:6.0878em;vertical-align:-2.7939em;\"><\/span><span class=\"mord\"><span class=\"mtable\"><span class=\"col-align-r\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:3.2939em;\"><span style=\"top:-5.5939em;\"><span class=\"pstrut\" style=\"height:3.75em;\"><\/span><span class=\"mord\"><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.6944em;\"><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03588em;\">y<\/span><\/span><span style=\"top:-3em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"accent-body\" style=\"left:-0.1944em;\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1944em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><span style=\"top:-2.5582em;\"><span class=\"pstrut\" style=\"height:3.75em;\"><\/span><span class=\"mord\"><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:2.7939em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"col-align-l\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:3.2939em;\"><span style=\"top:-5.5939em;\"><span class=\"pstrut\" style=\"height:3.75em;\"><\/span><span class=\"mord\"><span class=\"mord\"><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size3\">(<\/span><\/span><span class=\"mord\"><span class=\"mopen nulldelimiter\"><\/span><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.427em;\"><span style=\"top:-2.314em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position:relative;top:0em;\">&#x2211;<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1864em;\"><span style=\"top:-2.4003em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2997em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><span style=\"top:-3.23em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"frac-line\" style=\"border-bottom-width:0.04em;\"><\/span><\/span><span style=\"top:-3.677em;\"><span class=\"pstrut\" style=\"height:3em;\"><\/span><span class=\"mord\"><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.9857em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size3\">)<\/span><\/span><\/span><\/span><\/span><span style=\"top:-2.5582em;\"><span class=\"pstrut\" style=\"height:3.75em;\"><\/span><span class=\"mord\"><span class=\"mord\"><\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3117em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2861em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size4\">(<\/span><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.05em;\"><span style=\"top:-1.8479em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.3021em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">exp<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mbin\">&#x2212;<\/span><span class=\"mspace\" style=\"margin-right:0.2222em;\"><\/span><span class=\"mop\">max<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em;\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">o<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.3361em;\"><span style=\"top:-2.55em;margin-left:0em;margin-right:0.05em;\"><span class=\"pstrut\" style=\"height:2.7em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em;\">k<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em;\"><span class=\"delimsizing size4\">)<\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:2.7939em;\"><span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n<h2 class=\"mume-header\" id=\"%E4%BF%A1%E6%81%AF%E7%86%B5\">&#x4FE1;&#x606F;&#x71B5;<\/h2>\n\n<p><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mi>H<\/mi><mo stretchy=\"false\">[<\/mo><mi>P<\/mi><mo stretchy=\"false\">]<\/mo><mo>=<\/mo><munder><mo>&#x2211;<\/mo><mi>j<\/mi><\/munder><mo>&#x2212;<\/mo><mi>P<\/mi><mo stretchy=\"false\">(<\/mo><mi>j<\/mi><mo stretchy=\"false\">)<\/mo><mi>log<\/mi><mo>&#x2061;<\/mo><mi>P<\/mi><mo stretchy=\"false\">(<\/mo><mi>j<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><annotation encoding=\"application\/x-tex\">H[P]=\\sum_{j}-P(j) \\log P(j)<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.08125em;\">H<\/span><span class=\"mopen\">[<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mclose\">]<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2778em;\"><\/span><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.4638em;vertical-align:-1.4138em;\"><\/span><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.05em;\"><span style=\"top:-1.8723em;margin-left:0em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.05724em;\">j<\/span><\/span><\/span><\/span><span style=\"top:-3.05em;\"><span class=\"pstrut\" style=\"height:3.05em;\"><\/span><span><span class=\"mop op-symbol large-op\">&#x2211;<\/span><\/span><\/span><\/span><span class=\"vlist-s\">&#x200B;<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.4138em;\"><span><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord\">&#x2212;<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.05724em;\">j<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mop\">lo<span style=\"margin-right:0.01389em;\">g<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.05724em;\">j<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/p>\n<h3 class=\"mume-header\" id=\"%E7%90%86%E8%A7%A3\">&#x7406;&#x89E3;<\/h3>\n\n<ul>\n<li>&#x53EF;&#x4EE5;&#x628A;&#x4FE1;&#x606F;&#x71B5;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>H<\/mi><mo stretchy=\"false\">(<\/mo><mi>P<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><annotation encoding=\"application\/x-tex\">H(P)<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.08125em;\">H<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span>&#x7406;&#x89E3;&#x4E3A;&#x201C;<strong>&#x77E5;&#x9053;&#x771F;&#x5B9E;&#x6982;&#x7387;&#x7684;&#x4EBA;&#x6240;&#x7ECF;&#x5386;&#x7684;&#x60CA;&#x5F02;&#x7A0B;&#x5EA6;<\/strong>&#x201D;<\/li>\n<li>&#x53EF;&#x4EE5;&#x628A;&#x4EA4;&#x53C9;&#x71B5;<span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>H<\/mi><mo stretchy=\"false\">(<\/mo><mi>P<\/mi><mo separator=\"true\">,<\/mo><mi>Q<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><annotation encoding=\"application\/x-tex\">H(P,Q)<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em;\"><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.08125em;\">H<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.13889em;\">P<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.1667em;\"><\/span><span class=\"mord mathnormal\">Q<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span>&#x7406;&#x89E3;&#x4E3A;&#x201C;<strong>&#x4E3B;&#x89C2;&#x6982;&#x7387;&#x4E3A;Q&#x7684;&#x89C2;&#x5BDF;&#x8005;&#x5728;&#x770B;&#x5230;&#x6839;&#x636E;&#x6982;&#x7387;P&#x751F;&#x6210;&#x7684;&#x6570;&#x636E;&#x65F6;&#x7684;&#x9884;&#x671F;&#x60CA;&#x5F02;<\/strong>&#x201D;<\/li>\n<\/ul>\n<h3 class=\"mume-header\" id=\"%E4%BA%A4%E5%8F%89%E7%86%B5%E5%88%86%E7%B1%BB%E7%9B%AE%E6%A0%87\">&#x4EA4;&#x53C9;&#x71B5;&#x5206;&#x7C7B;&#x76EE;&#x6807;<\/h3>\n\n<p>&#x6700;&#x5927;&#x5316;&#x89C2;&#x6D4B;&#x6570;&#x636E;&#x7684;&#x4F3C;&#x7136;&#xFF0C;&#x4EA6;&#x5373;&#xFF0C;&#x6700;&#x5C0F;&#x5316;&#x4F20;&#x8FBE;&#x6807;&#x7B7E;&#x6240;&#x9700;&#x7684;&#x60CA;&#x5F02;<\/p>\n<blockquote>\n<p>&#x5982;&#x65E0;&#x5FC5;&#x8981;&#xFF0C;&#x52FF;&#x589E;&#x5B9E;&#x4F53;&#x3002;<br>\n&#x77E5;&#x4E4B;&#x4E3A;&#x77E5;&#x4E4B;&#xFF0C;&#x4E0D;&#x77E5;&#x4E3A;&#x4E0D;&#x77E5;&#xFF0C;&#x662F;&#x77E5;&#x4E5F;&#x3002;<\/p>\n<\/blockquote>\n<h2 class=\"mume-header\" id=\"%E6%A8%A1%E5%9E%8B%E9%A2%84%E6%B5%8B%E5%92%8C%E8%AF%84%E4%BC%B0\">&#x6A21;&#x578B;&#x9884;&#x6D4B;&#x548C;&#x8BC4;&#x4F30;<\/h2>\n\n<p>&#x4F7F;&#x7528;&#x7CBE;&#x5EA6;&#xFF08;accuracy&#xFF09;&#x6765;&#x8BC4;&#x4F30;&#x6A21;&#x578B;&#x6027;&#x80FD;&#x3002;<br>\n&#x7CBE;&#x5EA6;&#x7B49;&#x4E8E;&#x6B63;&#x786E;&#x9884;&#x6D4B;&#x6570;&#x5360;&#x9884;&#x6D4B;&#x603B;&#x6570;&#x7684;&#x6BD4;&#x7387;&#x3002;<\/p>\n<h2 class=\"mume-header\" id=\"softmax%E5%9B%9E%E5%BD%92%E7%9A%84%E7%AE%80%E6%B4%81%E5%AE%9E%E7%8E%B0\">softmax&#x56DE;&#x5F52;&#x7684;&#x7B80;&#x6D01;&#x5B9E;&#x73B0;<\/h2>\n\n<h3 class=\"mume-header\" id=\"%E5%88%9D%E5%A7%8B%E5%8C%96%E6%A8%A1%E5%9E%8B%E5%8F%82%E6%95%B0-1\">&#x521D;&#x59CB;&#x5316;&#x6A21;&#x578B;&#x53C2;&#x6570;<\/h3>\n\n<pre data-role=\"codeBlock\" data-info=\"python\" class=\"language-python\">net <span class=\"token operator\">=<\/span> nn<span class=\"token punctuation\">.<\/span>Sequential<span class=\"token punctuation\">(<\/span>nn<span class=\"token punctuation\">.<\/span>Flatten<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> nn<span class=\"token punctuation\">.<\/span>Linear<span class=\"token punctuation\">(<\/span><span class=\"token number\">784<\/span><span class=\"token punctuation\">,<\/span><span class=\"token number\">10<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">;<\/span>\n\n<span class=\"token keyword keyword-def\">def<\/span> <span class=\"token function\">init_weights<\/span><span class=\"token punctuation\">(<\/span>m<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\n    <span class=\"token keyword keyword-if\">if<\/span> <span class=\"token builtin\">isinstance<\/span><span class=\"token punctuation\">(<\/span>m<span class=\"token punctuation\">,<\/span> nn<span class=\"token punctuation\">.<\/span>Linear<span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">:<\/span>\n        torch<span class=\"token punctuation\">.<\/span>nn<span class=\"token punctuation\">.<\/span>init<span class=\"token punctuation\">.<\/span>normal_<span class=\"token punctuation\">(<\/span>m<span class=\"token punctuation\">.<\/span>weight<span class=\"token punctuation\">,<\/span> mean<span class=\"token operator\">=<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\">,<\/span> std<span class=\"token operator\">=<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\">)<\/span>\n        torch<span class=\"token punctuation\">.<\/span>nn<span class=\"token punctuation\">.<\/span>init<span class=\"token punctuation\">.<\/span>constant_<span class=\"token punctuation\">(<\/span>m<span class=\"token punctuation\">.<\/span>bias<span class=\"token punctuation\">,<\/span> <span class=\"token number\">0<\/span><span class=\"token punctuation\">)<\/span>\n\nnet<span class=\"token punctuation\">.<\/span><span class=\"token builtin\">apply<\/span><span class=\"token punctuation\">(<\/span>init_weights<span class=\"token punctuation\">)<\/span>\n<\/pre><h3 class=\"mume-header\" id=\"%E5%AE%9A%E4%B9%89%E6%8D%9F%E5%A4%B1%E5%87%BD%E6%95%B0%E4%B8%8E%E4%BC%98%E5%8C%96%E7%AE%97%E6%B3%95\">&#x5B9A;&#x4E49;&#x635F;&#x5931;&#x51FD;&#x6570;&#x4E0E;&#x4F18;&#x5316;&#x7B97;&#x6CD5;<\/h3>\n\n<pre data-role=\"codeBlock\" data-info=\"python\" class=\"language-python\">loss <span class=\"token operator\">=<\/span> nn<span class=\"token punctuation\">.<\/span>CrossEntropyLoss<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span>\ntrainer <span class=\"token operator\">=<\/span> torch<span class=\"token punctuation\">.<\/span>optim<span class=\"token punctuation\">.<\/span>SGD<span class=\"token punctuation\">(<\/span>net<span class=\"token punctuation\">.<\/span>parameters<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)<\/span><span class=\"token punctuation\">,<\/span> lr<span class=\"token operator\">=<\/span><span class=\"token number\">0.1<\/span><span class=\"token punctuation\">)<\/span>\n<\/pre>\n      <\/div>\n      \n","protected":false},"excerpt":{"rendered":"<p>\u8be5\u7ae0\u4e3b\u8981\u5305\u62ec\u7ebf\u6027\u56de\u5f52\u4e0esoftmax\u56de\u5f52\u3002 \u300a\u52a8\u624b\u5b66\u6df1\u5ea6\u5b66\u4e60\u300b \u2014 \u52a8\u624b\u5b66\u6df1\u5ea6\u5b66\u4e60 2.0.0-beta0 d &hellip; <a href=\"https:\/\/www.lazybirds.top\/?p=335\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u201cd2l\u81ea\u5b66\u7b14\u8bb0 &#8211; 3.\u7ebf\u6027\u795e\u7ecf\u7f51\u7edc\u201d<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[31,20],"tags":[32,33,26,25],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_likes_enabled":true,"_links":{"self":[{"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=\/wp\/v2\/posts\/335"}],"collection":[{"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=335"}],"version-history":[{"count":1,"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=\/wp\/v2\/posts\/335\/revisions"}],"predecessor-version":[{"id":336,"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=\/wp\/v2\/posts\/335\/revisions\/336"}],"wp:attachment":[{"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=335"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=335"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.lazybirds.top\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=335"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}