chore: 添加Stock-Prediction-Models项目文件
添加了Stock-Prediction-Models项目的多个文件,包括数据集、模型代码、README文档和CSS样式文件。这些文件用于股票预测模型的训练和展示,涵盖了LSTM、GRU等深度学习模型的应用。
This commit is contained in:
+682
File diff suppressed because one or more lines are too long
+680
File diff suppressed because one or more lines are too long
+736
File diff suppressed because one or more lines are too long
+721
File diff suppressed because one or more lines are too long
+687
File diff suppressed because one or more lines are too long
+726
File diff suppressed because one or more lines are too long
+704
File diff suppressed because one or more lines are too long
+717
File diff suppressed because one or more lines are too long
+718
File diff suppressed because one or more lines are too long
+706
File diff suppressed because one or more lines are too long
+705
File diff suppressed because one or more lines are too long
+759
File diff suppressed because one or more lines are too long
+675
File diff suppressed because one or more lines are too long
+704
File diff suppressed because one or more lines are too long
+742
File diff suppressed because one or more lines are too long
+672
File diff suppressed because one or more lines are too long
+701
File diff suppressed because one or more lines are too long
+739
File diff suppressed because one or more lines are too long
+318
@@ -0,0 +1,318 @@
|
||||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""DNC access modules."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import sonnet as snt
|
||||
import tensorflow as tf
|
||||
|
||||
import addressing
|
||||
import util
|
||||
|
||||
AccessState = collections.namedtuple('AccessState', (
|
||||
'memory', 'read_weights', 'write_weights', 'linkage', 'usage'))
|
||||
|
||||
|
||||
def _erase_and_write(memory, address, reset_weights, values):
|
||||
"""Module to erase and write in the external memory.
|
||||
|
||||
Erase operation:
|
||||
M_t'(i) = M_{t-1}(i) * (1 - w_t(i) * e_t)
|
||||
|
||||
Add operation:
|
||||
M_t(i) = M_t'(i) + w_t(i) * a_t
|
||||
|
||||
where e are the reset_weights, w the write weights and a the values.
|
||||
|
||||
Args:
|
||||
memory: 3-D tensor of shape `[batch_size, memory_size, word_size]`.
|
||||
address: 3-D tensor `[batch_size, num_writes, memory_size]`.
|
||||
reset_weights: 3-D tensor `[batch_size, num_writes, word_size]`.
|
||||
values: 3-D tensor `[batch_size, num_writes, word_size]`.
|
||||
|
||||
Returns:
|
||||
3-D tensor of shape `[batch_size, num_writes, word_size]`.
|
||||
"""
|
||||
with tf.name_scope('erase_memory', values=[memory, address, reset_weights]):
|
||||
expand_address = tf.expand_dims(address, 3)
|
||||
reset_weights = tf.expand_dims(reset_weights, 2)
|
||||
weighted_resets = expand_address * reset_weights
|
||||
reset_gate = tf.reduce_prod(1 - weighted_resets, [1])
|
||||
memory *= reset_gate
|
||||
|
||||
with tf.name_scope('additive_write', values=[memory, address, values]):
|
||||
add_matrix = tf.matmul(address, values, adjoint_a=True)
|
||||
memory += add_matrix
|
||||
|
||||
return memory
|
||||
|
||||
|
||||
class MemoryAccess(snt.RNNCore):
|
||||
"""Access module of the Differentiable Neural Computer.
|
||||
|
||||
This memory module supports multiple read and write heads. It makes use of:
|
||||
|
||||
* `addressing.TemporalLinkage` to track the temporal ordering of writes in
|
||||
memory for each write head.
|
||||
* `addressing.FreenessAllocator` for keeping track of memory usage, where
|
||||
usage increase when a memory location is written to, and decreases when
|
||||
memory is read from that the controller says can be freed.
|
||||
|
||||
Write-address selection is done by an interpolation between content-based
|
||||
lookup and using unused memory.
|
||||
|
||||
Read-address selection is done by an interpolation of content-based lookup
|
||||
and following the link graph in the forward or backwards read direction.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
memory_size=128,
|
||||
word_size=20,
|
||||
num_reads=1,
|
||||
num_writes=1,
|
||||
name='memory_access'):
|
||||
"""Creates a MemoryAccess module.
|
||||
|
||||
Args:
|
||||
memory_size: The number of memory slots (N in the DNC paper).
|
||||
word_size: The width of each memory slot (W in the DNC paper)
|
||||
num_reads: The number of read heads (R in the DNC paper).
|
||||
num_writes: The number of write heads (fixed at 1 in the paper).
|
||||
name: The name of the module.
|
||||
"""
|
||||
super(MemoryAccess, self).__init__(name=name)
|
||||
self._memory_size = memory_size
|
||||
self._word_size = word_size
|
||||
self._num_reads = num_reads
|
||||
self._num_writes = num_writes
|
||||
|
||||
self._write_content_weights_mod = addressing.CosineWeights(
|
||||
num_writes, word_size, name='write_content_weights')
|
||||
self._read_content_weights_mod = addressing.CosineWeights(
|
||||
num_reads, word_size, name='read_content_weights')
|
||||
|
||||
self._linkage = addressing.TemporalLinkage(memory_size, num_writes)
|
||||
self._freeness = addressing.Freeness(memory_size)
|
||||
|
||||
def _build(self, inputs, prev_state):
|
||||
"""Connects the MemoryAccess module into the graph.
|
||||
|
||||
Args:
|
||||
inputs: tensor of shape `[batch_size, input_size]`. This is used to
|
||||
control this access module.
|
||||
prev_state: Instance of `AccessState` containing the previous state.
|
||||
|
||||
Returns:
|
||||
A tuple `(output, next_state)`, where `output` is a tensor of shape
|
||||
`[batch_size, num_reads, word_size]`, and `next_state` is the new
|
||||
`AccessState` named tuple at the current time t.
|
||||
"""
|
||||
inputs = self._read_inputs(inputs)
|
||||
|
||||
# Update usage using inputs['free_gate'] and previous read & write weights.
|
||||
usage = self._freeness(
|
||||
write_weights=prev_state.write_weights,
|
||||
free_gate=inputs['free_gate'],
|
||||
read_weights=prev_state.read_weights,
|
||||
prev_usage=prev_state.usage)
|
||||
|
||||
# Write to memory.
|
||||
write_weights = self._write_weights(inputs, prev_state.memory, usage)
|
||||
memory = _erase_and_write(
|
||||
prev_state.memory,
|
||||
address=write_weights,
|
||||
reset_weights=inputs['erase_vectors'],
|
||||
values=inputs['write_vectors'])
|
||||
|
||||
linkage_state = self._linkage(write_weights, prev_state.linkage)
|
||||
|
||||
# Read from memory.
|
||||
read_weights = self._read_weights(
|
||||
inputs,
|
||||
memory=memory,
|
||||
prev_read_weights=prev_state.read_weights,
|
||||
link=linkage_state.link)
|
||||
read_words = tf.matmul(read_weights, memory)
|
||||
|
||||
return (read_words, AccessState(
|
||||
memory=memory,
|
||||
read_weights=read_weights,
|
||||
write_weights=write_weights,
|
||||
linkage=linkage_state,
|
||||
usage=usage))
|
||||
|
||||
def _read_inputs(self, inputs):
|
||||
"""Applies transformations to `inputs` to get control for this module."""
|
||||
|
||||
def _linear(first_dim, second_dim, name, activation=None):
|
||||
"""Returns a linear transformation of `inputs`, followed by a reshape."""
|
||||
linear = snt.Linear(first_dim * second_dim, name=name)(inputs)
|
||||
if activation is not None:
|
||||
linear = activation(linear, name=name + '_activation')
|
||||
return tf.reshape(linear, [-1, first_dim, second_dim])
|
||||
|
||||
# v_t^i - The vectors to write to memory, for each write head `i`.
|
||||
write_vectors = _linear(self._num_writes, self._word_size, 'write_vectors')
|
||||
|
||||
# e_t^i - Amount to erase the memory by before writing, for each write head.
|
||||
erase_vectors = _linear(self._num_writes, self._word_size, 'erase_vectors',
|
||||
tf.sigmoid)
|
||||
|
||||
# f_t^j - Amount that the memory at the locations read from at the previous
|
||||
# time step can be declared unused, for each read head `j`.
|
||||
free_gate = tf.sigmoid(
|
||||
snt.Linear(self._num_reads, name='free_gate')(inputs))
|
||||
|
||||
# g_t^{a, i} - Interpolation between writing to unallocated memory and
|
||||
# content-based lookup, for each write head `i`. Note: `a` is simply used to
|
||||
# identify this gate with allocation vs writing (as defined below).
|
||||
allocation_gate = tf.sigmoid(
|
||||
snt.Linear(self._num_writes, name='allocation_gate')(inputs))
|
||||
|
||||
# g_t^{w, i} - Overall gating of write amount for each write head.
|
||||
write_gate = tf.sigmoid(
|
||||
snt.Linear(self._num_writes, name='write_gate')(inputs))
|
||||
|
||||
# \pi_t^j - Mixing between "backwards" and "forwards" positions (for
|
||||
# each write head), and content-based lookup, for each read head.
|
||||
num_read_modes = 1 + 2 * self._num_writes
|
||||
read_mode = snt.BatchApply(tf.nn.softmax)(
|
||||
_linear(self._num_reads, num_read_modes, name='read_mode'))
|
||||
|
||||
# Parameters for the (read / write) "weights by content matching" modules.
|
||||
write_keys = _linear(self._num_writes, self._word_size, 'write_keys')
|
||||
write_strengths = snt.Linear(self._num_writes, name='write_strengths')(
|
||||
inputs)
|
||||
|
||||
read_keys = _linear(self._num_reads, self._word_size, 'read_keys')
|
||||
read_strengths = snt.Linear(self._num_reads, name='read_strengths')(inputs)
|
||||
|
||||
result = {
|
||||
'read_content_keys': read_keys,
|
||||
'read_content_strengths': read_strengths,
|
||||
'write_content_keys': write_keys,
|
||||
'write_content_strengths': write_strengths,
|
||||
'write_vectors': write_vectors,
|
||||
'erase_vectors': erase_vectors,
|
||||
'free_gate': free_gate,
|
||||
'allocation_gate': allocation_gate,
|
||||
'write_gate': write_gate,
|
||||
'read_mode': read_mode,
|
||||
}
|
||||
return result
|
||||
|
||||
def _write_weights(self, inputs, memory, usage):
|
||||
"""Calculates the memory locations to write to.
|
||||
|
||||
This uses a combination of content-based lookup and finding an unused
|
||||
location in memory, for each write head.
|
||||
|
||||
Args:
|
||||
inputs: Collection of inputs to the access module, including controls for
|
||||
how to chose memory writing, such as the content to look-up and the
|
||||
weighting between content-based and allocation-based addressing.
|
||||
memory: A tensor of shape `[batch_size, memory_size, word_size]`
|
||||
containing the current memory contents.
|
||||
usage: Current memory usage, which is a tensor of shape `[batch_size,
|
||||
memory_size]`, used for allocation-based addressing.
|
||||
|
||||
Returns:
|
||||
tensor of shape `[batch_size, num_writes, memory_size]` indicating where
|
||||
to write to (if anywhere) for each write head.
|
||||
"""
|
||||
with tf.name_scope('write_weights', values=[inputs, memory, usage]):
|
||||
# c_t^{w, i} - The content-based weights for each write head.
|
||||
write_content_weights = self._write_content_weights_mod(
|
||||
memory, inputs['write_content_keys'],
|
||||
inputs['write_content_strengths'])
|
||||
|
||||
# a_t^i - The allocation weights for each write head.
|
||||
write_allocation_weights = self._freeness.write_allocation_weights(
|
||||
usage=usage,
|
||||
write_gates=(inputs['allocation_gate'] * inputs['write_gate']),
|
||||
num_writes=self._num_writes)
|
||||
|
||||
# Expands gates over memory locations.
|
||||
allocation_gate = tf.expand_dims(inputs['allocation_gate'], -1)
|
||||
write_gate = tf.expand_dims(inputs['write_gate'], -1)
|
||||
|
||||
# w_t^{w, i} - The write weightings for each write head.
|
||||
return write_gate * (allocation_gate * write_allocation_weights +
|
||||
(1 - allocation_gate) * write_content_weights)
|
||||
|
||||
def _read_weights(self, inputs, memory, prev_read_weights, link):
|
||||
"""Calculates read weights for each read head.
|
||||
|
||||
The read weights are a combination of following the link graphs in the
|
||||
forward or backward directions from the previous read position, and doing
|
||||
content-based lookup. The interpolation between these different modes is
|
||||
done by `inputs['read_mode']`.
|
||||
|
||||
Args:
|
||||
inputs: Controls for this access module. This contains the content-based
|
||||
keys to lookup, and the weightings for the different read modes.
|
||||
memory: A tensor of shape `[batch_size, memory_size, word_size]`
|
||||
containing the current memory contents to do content-based lookup.
|
||||
prev_read_weights: A tensor of shape `[batch_size, num_reads,
|
||||
memory_size]` containing the previous read locations.
|
||||
link: A tensor of shape `[batch_size, num_writes, memory_size,
|
||||
memory_size]` containing the temporal write transition graphs.
|
||||
|
||||
Returns:
|
||||
A tensor of shape `[batch_size, num_reads, memory_size]` containing the
|
||||
read weights for each read head.
|
||||
"""
|
||||
with tf.name_scope(
|
||||
'read_weights', values=[inputs, memory, prev_read_weights, link]):
|
||||
# c_t^{r, i} - The content weightings for each read head.
|
||||
content_weights = self._read_content_weights_mod(
|
||||
memory, inputs['read_content_keys'], inputs['read_content_strengths'])
|
||||
|
||||
# Calculates f_t^i and b_t^i.
|
||||
forward_weights = self._linkage.directional_read_weights(
|
||||
link, prev_read_weights, forward=True)
|
||||
backward_weights = self._linkage.directional_read_weights(
|
||||
link, prev_read_weights, forward=False)
|
||||
|
||||
backward_mode = inputs['read_mode'][:, :, :self._num_writes]
|
||||
forward_mode = (
|
||||
inputs['read_mode'][:, :, self._num_writes:2 * self._num_writes])
|
||||
content_mode = inputs['read_mode'][:, :, 2 * self._num_writes]
|
||||
|
||||
read_weights = (
|
||||
tf.expand_dims(content_mode, 2) * content_weights + tf.reduce_sum(
|
||||
tf.expand_dims(forward_mode, 3) * forward_weights, 2) +
|
||||
tf.reduce_sum(tf.expand_dims(backward_mode, 3) * backward_weights, 2))
|
||||
|
||||
return read_weights
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
"""Returns a tuple of the shape of the state tensors."""
|
||||
return AccessState(
|
||||
memory=tf.TensorShape([self._memory_size, self._word_size]),
|
||||
read_weights=tf.TensorShape([self._num_reads, self._memory_size]),
|
||||
write_weights=tf.TensorShape([self._num_writes, self._memory_size]),
|
||||
linkage=self._linkage.state_size,
|
||||
usage=self._freeness.state_size)
|
||||
|
||||
@property
|
||||
def output_size(self):
|
||||
"""Returns the output shape."""
|
||||
return tf.TensorShape([self._num_reads, self._word_size])
|
||||
+410
@@ -0,0 +1,410 @@
|
||||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""DNC addressing modules."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import sonnet as snt
|
||||
import tensorflow as tf
|
||||
|
||||
import util
|
||||
|
||||
# Ensure values are greater than epsilon to avoid numerical instability.
|
||||
_EPSILON = 1e-6
|
||||
|
||||
TemporalLinkageState = collections.namedtuple('TemporalLinkageState',
|
||||
('link', 'precedence_weights'))
|
||||
|
||||
|
||||
def _vector_norms(m):
|
||||
squared_norms = tf.reduce_sum(m * m, axis=2, keep_dims=True)
|
||||
return tf.sqrt(squared_norms + _EPSILON)
|
||||
|
||||
|
||||
def weighted_softmax(activations, strengths, strengths_op):
|
||||
"""Returns softmax over activations multiplied by positive strengths.
|
||||
|
||||
Args:
|
||||
activations: A tensor of shape `[batch_size, num_heads, memory_size]`, of
|
||||
activations to be transformed. Softmax is taken over the last dimension.
|
||||
strengths: A tensor of shape `[batch_size, num_heads]` containing strengths to
|
||||
multiply by the activations prior to the softmax.
|
||||
strengths_op: An operation to transform strengths before softmax.
|
||||
|
||||
Returns:
|
||||
A tensor of same shape as `activations` with weighted softmax applied.
|
||||
"""
|
||||
transformed_strengths = tf.expand_dims(strengths_op(strengths), -1)
|
||||
sharp_activations = activations * transformed_strengths
|
||||
softmax = snt.BatchApply(module_or_op=tf.nn.softmax)
|
||||
return softmax(sharp_activations)
|
||||
|
||||
|
||||
class CosineWeights(snt.AbstractModule):
|
||||
"""Cosine-weighted attention.
|
||||
|
||||
Calculates the cosine similarity between a query and each word in memory, then
|
||||
applies a weighted softmax to return a sharp distribution.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
num_heads,
|
||||
word_size,
|
||||
strength_op=tf.nn.softplus,
|
||||
name='cosine_weights'):
|
||||
"""Initializes the CosineWeights module.
|
||||
|
||||
Args:
|
||||
num_heads: number of memory heads.
|
||||
word_size: memory word size.
|
||||
strength_op: operation to apply to strengths (default is tf.nn.softplus).
|
||||
name: module name (default 'cosine_weights')
|
||||
"""
|
||||
super(CosineWeights, self).__init__(name=name)
|
||||
self._num_heads = num_heads
|
||||
self._word_size = word_size
|
||||
self._strength_op = strength_op
|
||||
|
||||
def _build(self, memory, keys, strengths):
|
||||
"""Connects the CosineWeights module into the graph.
|
||||
|
||||
Args:
|
||||
memory: A 3-D tensor of shape `[batch_size, memory_size, word_size]`.
|
||||
keys: A 3-D tensor of shape `[batch_size, num_heads, word_size]`.
|
||||
strengths: A 2-D tensor of shape `[batch_size, num_heads]`.
|
||||
|
||||
Returns:
|
||||
Weights tensor of shape `[batch_size, num_heads, memory_size]`.
|
||||
"""
|
||||
# Calculates the inner product between the query vector and words in memory.
|
||||
dot = tf.matmul(keys, memory, adjoint_b=True)
|
||||
|
||||
# Outer product to compute denominator (euclidean norm of query and memory).
|
||||
memory_norms = _vector_norms(memory)
|
||||
key_norms = _vector_norms(keys)
|
||||
norm = tf.matmul(key_norms, memory_norms, adjoint_b=True)
|
||||
|
||||
# Calculates cosine similarity between the query vector and words in memory.
|
||||
similarity = dot / (norm + _EPSILON)
|
||||
|
||||
return weighted_softmax(similarity, strengths, self._strength_op)
|
||||
|
||||
|
||||
class TemporalLinkage(snt.RNNCore):
|
||||
"""Keeps track of write order for forward and backward addressing.
|
||||
|
||||
This is a pseudo-RNNCore module, whose state is a pair `(link,
|
||||
precedence_weights)`, where `link` is a (collection of) graphs for (possibly
|
||||
multiple) write heads (represented by a tensor with values in the range
|
||||
[0, 1]), and `precedence_weights` records the "previous write locations" used
|
||||
to build the link graphs.
|
||||
|
||||
The function `directional_read_weights` computes addresses following the
|
||||
forward and backward directions in the link graphs.
|
||||
"""
|
||||
|
||||
def __init__(self, memory_size, num_writes, name='temporal_linkage'):
|
||||
"""Construct a TemporalLinkage module.
|
||||
|
||||
Args:
|
||||
memory_size: The number of memory slots.
|
||||
num_writes: The number of write heads.
|
||||
name: Name of the module.
|
||||
"""
|
||||
super(TemporalLinkage, self).__init__(name=name)
|
||||
self._memory_size = memory_size
|
||||
self._num_writes = num_writes
|
||||
|
||||
def _build(self, write_weights, prev_state):
|
||||
"""Calculate the updated linkage state given the write weights.
|
||||
|
||||
Args:
|
||||
write_weights: A tensor of shape `[batch_size, num_writes, memory_size]`
|
||||
containing the memory addresses of the different write heads.
|
||||
prev_state: `TemporalLinkageState` tuple containg a tensor `link` of
|
||||
shape `[batch_size, num_writes, memory_size, memory_size]`, and a
|
||||
tensor `precedence_weights` of shape `[batch_size, num_writes,
|
||||
memory_size]` containing the aggregated history of recent writes.
|
||||
|
||||
Returns:
|
||||
A `TemporalLinkageState` tuple `next_state`, which contains the updated
|
||||
link and precedence weights.
|
||||
"""
|
||||
link = self._link(prev_state.link, prev_state.precedence_weights,
|
||||
write_weights)
|
||||
precedence_weights = self._precedence_weights(prev_state.precedence_weights,
|
||||
write_weights)
|
||||
return TemporalLinkageState(
|
||||
link=link, precedence_weights=precedence_weights)
|
||||
|
||||
def directional_read_weights(self, link, prev_read_weights, forward):
|
||||
"""Calculates the forward or the backward read weights.
|
||||
|
||||
For each read head (at a given address), there are `num_writes` link graphs
|
||||
to follow. Thus this function computes a read address for each of the
|
||||
`num_reads * num_writes` pairs of read and write heads.
|
||||
|
||||
Args:
|
||||
link: tensor of shape `[batch_size, num_writes, memory_size,
|
||||
memory_size]` representing the link graphs L_t.
|
||||
prev_read_weights: tensor of shape `[batch_size, num_reads,
|
||||
memory_size]` containing the previous read weights w_{t-1}^r.
|
||||
forward: Boolean indicating whether to follow the "future" direction in
|
||||
the link graph (True) or the "past" direction (False).
|
||||
|
||||
Returns:
|
||||
tensor of shape `[batch_size, num_reads, num_writes, memory_size]`
|
||||
"""
|
||||
with tf.name_scope('directional_read_weights'):
|
||||
# We calculate the forward and backward directions for each pair of
|
||||
# read and write heads; hence we need to tile the read weights and do a
|
||||
# sort of "outer product" to get this.
|
||||
expanded_read_weights = tf.stack([prev_read_weights] * self._num_writes,
|
||||
1)
|
||||
result = tf.matmul(expanded_read_weights, link, adjoint_b=forward)
|
||||
# Swap dimensions 1, 2 so order is [batch, reads, writes, memory]:
|
||||
return tf.transpose(result, perm=[0, 2, 1, 3])
|
||||
|
||||
def _link(self, prev_link, prev_precedence_weights, write_weights):
|
||||
"""Calculates the new link graphs.
|
||||
|
||||
For each write head, the link is a directed graph (represented by a matrix
|
||||
with entries in range [0, 1]) whose vertices are the memory locations, and
|
||||
an edge indicates temporal ordering of writes.
|
||||
|
||||
Args:
|
||||
prev_link: A tensor of shape `[batch_size, num_writes, memory_size,
|
||||
memory_size]` representing the previous link graphs for each write
|
||||
head.
|
||||
prev_precedence_weights: A tensor of shape `[batch_size, num_writes,
|
||||
memory_size]` which is the previous "aggregated" write weights for
|
||||
each write head.
|
||||
write_weights: A tensor of shape `[batch_size, num_writes, memory_size]`
|
||||
containing the new locations in memory written to.
|
||||
|
||||
Returns:
|
||||
A tensor of shape `[batch_size, num_writes, memory_size, memory_size]`
|
||||
containing the new link graphs for each write head.
|
||||
"""
|
||||
with tf.name_scope('link'):
|
||||
batch_size = prev_link.get_shape()[0].value
|
||||
write_weights_i = tf.expand_dims(write_weights, 3)
|
||||
write_weights_j = tf.expand_dims(write_weights, 2)
|
||||
prev_precedence_weights_j = tf.expand_dims(prev_precedence_weights, 2)
|
||||
prev_link_scale = 1 - write_weights_i - write_weights_j
|
||||
new_link = write_weights_i * prev_precedence_weights_j
|
||||
link = prev_link_scale * prev_link + new_link
|
||||
# Return the link with the diagonal set to zero, to remove self-looping
|
||||
# edges.
|
||||
return tf.matrix_set_diag(
|
||||
link,
|
||||
tf.zeros(
|
||||
[batch_size, self._num_writes, self._memory_size],
|
||||
dtype=link.dtype))
|
||||
|
||||
def _precedence_weights(self, prev_precedence_weights, write_weights):
|
||||
"""Calculates the new precedence weights given the current write weights.
|
||||
|
||||
The precedence weights are the "aggregated write weights" for each write
|
||||
head, where write weights with sum close to zero will leave the precedence
|
||||
weights unchanged, but with sum close to one will replace the precedence
|
||||
weights.
|
||||
|
||||
Args:
|
||||
prev_precedence_weights: A tensor of shape `[batch_size, num_writes,
|
||||
memory_size]` containing the previous precedence weights.
|
||||
write_weights: A tensor of shape `[batch_size, num_writes, memory_size]`
|
||||
containing the new write weights.
|
||||
|
||||
Returns:
|
||||
A tensor of shape `[batch_size, num_writes, memory_size]` containing the
|
||||
new precedence weights.
|
||||
"""
|
||||
with tf.name_scope('precedence_weights'):
|
||||
write_sum = tf.reduce_sum(write_weights, 2, keep_dims=True)
|
||||
return (1 - write_sum) * prev_precedence_weights + write_weights
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
"""Returns a `TemporalLinkageState` tuple of the state tensors' shapes."""
|
||||
return TemporalLinkageState(
|
||||
link=tf.TensorShape(
|
||||
[self._num_writes, self._memory_size, self._memory_size]),
|
||||
precedence_weights=tf.TensorShape([self._num_writes,
|
||||
self._memory_size]),)
|
||||
|
||||
|
||||
class Freeness(snt.RNNCore):
|
||||
"""Memory usage that is increased by writing and decreased by reading.
|
||||
|
||||
This module is a pseudo-RNNCore whose state is a tensor with values in
|
||||
the range [0, 1] indicating the usage of each of `memory_size` memory slots.
|
||||
|
||||
The usage is:
|
||||
|
||||
* Increased by writing, where usage is increased towards 1 at the write
|
||||
addresses.
|
||||
* Decreased by reading, where usage is decreased after reading from a
|
||||
location when free_gate is close to 1.
|
||||
|
||||
The function `write_allocation_weights` can be invoked to get free locations
|
||||
to write to for a number of write heads.
|
||||
"""
|
||||
|
||||
def __init__(self, memory_size, name='freeness'):
|
||||
"""Creates a Freeness module.
|
||||
|
||||
Args:
|
||||
memory_size: Number of memory slots.
|
||||
name: Name of the module.
|
||||
"""
|
||||
super(Freeness, self).__init__(name=name)
|
||||
self._memory_size = memory_size
|
||||
|
||||
def _build(self, write_weights, free_gate, read_weights, prev_usage):
|
||||
"""Calculates the new memory usage u_t.
|
||||
|
||||
Memory that was written to in the previous time step will have its usage
|
||||
increased; memory that was read from and the controller says can be "freed"
|
||||
will have its usage decreased.
|
||||
|
||||
Args:
|
||||
write_weights: tensor of shape `[batch_size, num_writes,
|
||||
memory_size]` giving write weights at previous time step.
|
||||
free_gate: tensor of shape `[batch_size, num_reads]` which indicates
|
||||
which read heads read memory that can now be freed.
|
||||
read_weights: tensor of shape `[batch_size, num_reads,
|
||||
memory_size]` giving read weights at previous time step.
|
||||
prev_usage: tensor of shape `[batch_size, memory_size]` giving
|
||||
usage u_{t - 1} at the previous time step, with entries in range
|
||||
[0, 1].
|
||||
|
||||
Returns:
|
||||
tensor of shape `[batch_size, memory_size]` representing updated memory
|
||||
usage.
|
||||
"""
|
||||
# Calculation of usage is not differentiable with respect to write weights.
|
||||
write_weights = tf.stop_gradient(write_weights)
|
||||
usage = self._usage_after_write(prev_usage, write_weights)
|
||||
usage = self._usage_after_read(usage, free_gate, read_weights)
|
||||
return usage
|
||||
|
||||
def write_allocation_weights(self, usage, write_gates, num_writes):
|
||||
"""Calculates freeness-based locations for writing to.
|
||||
|
||||
This finds unused memory by ranking the memory locations by usage, for each
|
||||
write head. (For more than one write head, we use a "simulated new usage"
|
||||
which takes into account the fact that the previous write head will increase
|
||||
the usage in that area of the memory.)
|
||||
|
||||
Args:
|
||||
usage: A tensor of shape `[batch_size, memory_size]` representing
|
||||
current memory usage.
|
||||
write_gates: A tensor of shape `[batch_size, num_writes]` with values in
|
||||
the range [0, 1] indicating how much each write head does writing
|
||||
based on the address returned here (and hence how much usage
|
||||
increases).
|
||||
num_writes: The number of write heads to calculate write weights for.
|
||||
|
||||
Returns:
|
||||
tensor of shape `[batch_size, num_writes, memory_size]` containing the
|
||||
freeness-based write locations. Note that this isn't scaled by
|
||||
`write_gate`; this scaling must be applied externally.
|
||||
"""
|
||||
with tf.name_scope('write_allocation_weights'):
|
||||
# expand gatings over memory locations
|
||||
write_gates = tf.expand_dims(write_gates, -1)
|
||||
|
||||
allocation_weights = []
|
||||
for i in range(num_writes):
|
||||
allocation_weights.append(self._allocation(usage))
|
||||
# update usage to take into account writing to this new allocation
|
||||
usage += ((1 - usage) * write_gates[:, i, :] * allocation_weights[i])
|
||||
|
||||
# Pack the allocation weights for the write heads into one tensor.
|
||||
return tf.stack(allocation_weights, axis=1)
|
||||
|
||||
def _usage_after_write(self, prev_usage, write_weights):
|
||||
"""Calcualtes the new usage after writing to memory.
|
||||
|
||||
Args:
|
||||
prev_usage: tensor of shape `[batch_size, memory_size]`.
|
||||
write_weights: tensor of shape `[batch_size, num_writes, memory_size]`.
|
||||
|
||||
Returns:
|
||||
New usage, a tensor of shape `[batch_size, memory_size]`.
|
||||
"""
|
||||
with tf.name_scope('usage_after_write'):
|
||||
# Calculate the aggregated effect of all write heads
|
||||
write_weights = 1 - tf.reduce_prod(1 - write_weights, [1])
|
||||
return prev_usage + (1 - prev_usage) * write_weights
|
||||
|
||||
def _usage_after_read(self, prev_usage, free_gate, read_weights):
|
||||
"""Calcualtes the new usage after reading and freeing from memory.
|
||||
|
||||
Args:
|
||||
prev_usage: tensor of shape `[batch_size, memory_size]`.
|
||||
free_gate: tensor of shape `[batch_size, num_reads]` with entries in the
|
||||
range [0, 1] indicating the amount that locations read from can be
|
||||
freed.
|
||||
read_weights: tensor of shape `[batch_size, num_reads, memory_size]`.
|
||||
|
||||
Returns:
|
||||
New usage, a tensor of shape `[batch_size, memory_size]`.
|
||||
"""
|
||||
with tf.name_scope('usage_after_read'):
|
||||
free_gate = tf.expand_dims(free_gate, -1)
|
||||
free_read_weights = free_gate * read_weights
|
||||
phi = tf.reduce_prod(1 - free_read_weights, [1], name='phi')
|
||||
return prev_usage * phi
|
||||
|
||||
def _allocation(self, usage):
|
||||
r"""Computes allocation by sorting `usage`.
|
||||
|
||||
This corresponds to the value a = a_t[\phi_t[j]] in the paper.
|
||||
|
||||
Args:
|
||||
usage: tensor of shape `[batch_size, memory_size]` indicating current
|
||||
memory usage. This is equal to u_t in the paper when we only have one
|
||||
write head, but for multiple write heads, one should update the usage
|
||||
while iterating through the write heads to take into account the
|
||||
allocation returned by this function.
|
||||
|
||||
Returns:
|
||||
Tensor of shape `[batch_size, memory_size]` corresponding to allocation.
|
||||
"""
|
||||
with tf.name_scope('allocation'):
|
||||
# Ensure values are not too small prior to cumprod.
|
||||
usage = _EPSILON + (1 - _EPSILON) * usage
|
||||
|
||||
nonusage = 1 - usage
|
||||
sorted_nonusage, indices = tf.nn.top_k(
|
||||
nonusage, k=self._memory_size, name='sort')
|
||||
sorted_usage = 1 - sorted_nonusage
|
||||
prod_sorted_usage = tf.cumprod(sorted_usage, axis=1, exclusive=True)
|
||||
sorted_allocation = sorted_nonusage * prod_sorted_usage
|
||||
inverse_indices = util.batch_invert_permutation(indices)
|
||||
|
||||
# This final line "unsorts" sorted_allocation, so that the indexing
|
||||
# corresponds to the original indexing of `usage`.
|
||||
return util.batch_gather(sorted_allocation, inverse_indices)
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
"""Returns the shape of the state tensor."""
|
||||
return tf.TensorShape([self._memory_size])
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
def reducedimension(input_, dimension = 2, learning_rate = 0.01, hidden_layer = 256, epoch = 20):
|
||||
|
||||
input_size = input_.shape[1]
|
||||
X = tf.placeholder("float", [None, input_size])
|
||||
|
||||
weights = {
|
||||
'encoder_h1': tf.Variable(tf.random_normal([input_size, hidden_layer])),
|
||||
'encoder_h2': tf.Variable(tf.random_normal([hidden_layer, dimension])),
|
||||
'decoder_h1': tf.Variable(tf.random_normal([dimension, hidden_layer])),
|
||||
'decoder_h2': tf.Variable(tf.random_normal([hidden_layer, input_size])),
|
||||
}
|
||||
|
||||
biases = {
|
||||
'encoder_b1': tf.Variable(tf.random_normal([hidden_layer])),
|
||||
'encoder_b2': tf.Variable(tf.random_normal([dimension])),
|
||||
'decoder_b1': tf.Variable(tf.random_normal([hidden_layer])),
|
||||
'decoder_b2': tf.Variable(tf.random_normal([input_size])),
|
||||
}
|
||||
|
||||
first_layer_encoder = tf.nn.sigmoid(tf.add(tf.matmul(X, weights['encoder_h1']), biases['encoder_b1']))
|
||||
second_layer_encoder = tf.nn.sigmoid(tf.add(tf.matmul(first_layer_encoder, weights['encoder_h2']), biases['encoder_b2']))
|
||||
first_layer_decoder = tf.nn.sigmoid(tf.add(tf.matmul(second_layer_encoder, weights['decoder_h1']), biases['decoder_b1']))
|
||||
second_layer_decoder = tf.nn.sigmoid(tf.add(tf.matmul(first_layer_decoder, weights['decoder_h2']), biases['decoder_b2']))
|
||||
cost = tf.reduce_mean(tf.pow(X - second_layer_decoder, 2))
|
||||
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
|
||||
sess = tf.InteractiveSession()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
|
||||
for i in range(epoch):
|
||||
last_time = time.time()
|
||||
_, loss = sess.run([optimizer, cost], feed_dict={X: input_})
|
||||
if (i + 1) % 10 == 0:
|
||||
print('epoch:', i + 1, 'loss:', loss, 'time:', time.time() - last_time)
|
||||
|
||||
vectors = sess.run(second_layer_encoder, feed_dict={X: input_})
|
||||
tf.reset_default_graph()
|
||||
return vectors
|
||||
+142
@@ -0,0 +1,142 @@
|
||||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""DNC Cores.
|
||||
|
||||
These modules create a DNC core. They take input, pass parameters to the memory
|
||||
access module, and integrate the output of memory to form an output.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import numpy as np
|
||||
import sonnet as snt
|
||||
import tensorflow as tf
|
||||
|
||||
import access
|
||||
|
||||
DNCState = collections.namedtuple('DNCState', ('access_output', 'access_state',
|
||||
'controller_state'))
|
||||
|
||||
|
||||
class DNC(snt.RNNCore):
|
||||
"""DNC core module.
|
||||
|
||||
Contains controller and memory access module.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
access_config,
|
||||
controller_config,
|
||||
output_size,
|
||||
clip_value=None,
|
||||
name='dnc'):
|
||||
"""Initializes the DNC core.
|
||||
|
||||
Args:
|
||||
access_config: dictionary of access module configurations.
|
||||
controller_config: dictionary of controller (LSTM) module configurations.
|
||||
output_size: output dimension size of core.
|
||||
clip_value: clips controller and core output values to between
|
||||
`[-clip_value, clip_value]` if specified.
|
||||
name: module name (default 'dnc').
|
||||
|
||||
Raises:
|
||||
TypeError: if direct_input_size is not None for any access module other
|
||||
than KeyValueMemory.
|
||||
"""
|
||||
super(DNC, self).__init__(name=name)
|
||||
|
||||
with self._enter_variable_scope():
|
||||
self._controller = snt.LSTM(**controller_config)
|
||||
self._access = access.MemoryAccess(**access_config)
|
||||
|
||||
self._access_output_size = np.prod(self._access.output_size.as_list())
|
||||
self._output_size = output_size
|
||||
self._clip_value = clip_value or 0
|
||||
|
||||
self._output_size = tf.TensorShape([output_size])
|
||||
self._state_size = DNCState(
|
||||
access_output=self._access_output_size,
|
||||
access_state=self._access.state_size,
|
||||
controller_state=self._controller.state_size)
|
||||
|
||||
def _clip_if_enabled(self, x):
|
||||
if self._clip_value > 0:
|
||||
return tf.clip_by_value(x, -self._clip_value, self._clip_value)
|
||||
else:
|
||||
return x
|
||||
|
||||
def _build(self, inputs, prev_state):
|
||||
"""Connects the DNC core into the graph.
|
||||
|
||||
Args:
|
||||
inputs: Tensor input.
|
||||
prev_state: A `DNCState` tuple containing the fields `access_output`,
|
||||
`access_state` and `controller_state`. `access_state` is a 3-D Tensor
|
||||
of shape `[batch_size, num_reads, word_size]` containing read words.
|
||||
`access_state` is a tuple of the access module's state, and
|
||||
`controller_state` is a tuple of controller module's state.
|
||||
|
||||
Returns:
|
||||
A tuple `(output, next_state)` where `output` is a tensor and `next_state`
|
||||
is a `DNCState` tuple containing the fields `access_output`,
|
||||
`access_state`, and `controller_state`.
|
||||
"""
|
||||
|
||||
prev_access_output = prev_state.access_output
|
||||
prev_access_state = prev_state.access_state
|
||||
prev_controller_state = prev_state.controller_state
|
||||
|
||||
batch_flatten = snt.BatchFlatten()
|
||||
controller_input = tf.concat(
|
||||
[batch_flatten(inputs), batch_flatten(prev_access_output)], 1)
|
||||
|
||||
controller_output, controller_state = self._controller(
|
||||
controller_input, prev_controller_state)
|
||||
|
||||
controller_output = self._clip_if_enabled(controller_output)
|
||||
controller_state = snt.nest.map(self._clip_if_enabled, controller_state)
|
||||
|
||||
access_output, access_state = self._access(controller_output,
|
||||
prev_access_state)
|
||||
|
||||
output = tf.concat([controller_output, batch_flatten(access_output)], 1)
|
||||
output = snt.Linear(
|
||||
output_size=self._output_size.as_list()[0],
|
||||
name='output_linear')(output)
|
||||
output = self._clip_if_enabled(output)
|
||||
|
||||
return output, DNCState(
|
||||
access_output=access_output,
|
||||
access_state=access_state,
|
||||
controller_state=controller_state)
|
||||
|
||||
def initial_state(self, batch_size, dtype=tf.float32):
|
||||
return DNCState(
|
||||
controller_state=self._controller.initial_state(batch_size, dtype),
|
||||
access_state=self._access.initial_state(batch_size, dtype),
|
||||
access_output=tf.zeros(
|
||||
[batch_size] + self._access.output_size.as_list(), dtype))
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
return self._state_size
|
||||
|
||||
@property
|
||||
def output_size(self):
|
||||
return self._output_size
|
||||
+739
File diff suppressed because one or more lines are too long
+746
File diff suppressed because one or more lines are too long
+45
@@ -0,0 +1,45 @@
|
||||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""DNC util ops and modules."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def batch_invert_permutation(permutations):
|
||||
"""Returns batched `tf.invert_permutation` for every row in `permutations`."""
|
||||
with tf.name_scope('batch_invert_permutation', values=[permutations]):
|
||||
unpacked = tf.unstack(permutations)
|
||||
inverses = [tf.invert_permutation(permutation) for permutation in unpacked]
|
||||
return tf.stack(inverses)
|
||||
|
||||
|
||||
def batch_gather(values, indices):
|
||||
"""Returns batched `tf.gather` for every row in the input."""
|
||||
with tf.name_scope('batch_gather', values=[values, indices]):
|
||||
unpacked = zip(tf.unstack(values), tf.unstack(indices))
|
||||
result = [tf.gather(value, index) for value, index in unpacked]
|
||||
return tf.stack(result)
|
||||
|
||||
|
||||
def one_hot(length, index):
|
||||
"""Return an nd array of given `length` filled with 0s and a 1 at `index`."""
|
||||
result = np.zeros(length)
|
||||
result[index] = 1
|
||||
return result
|
||||
Reference in New Issue
Block a user