chore: 添加Stock-Prediction-Models项目文件
添加了Stock-Prediction-Models项目的多个文件,包括数据集、模型代码、README文档和CSS样式文件。这些文件用于股票预测模型的训练和展示,涵盖了LSTM、GRU等深度学习模型的应用。
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,318 @@
|
||||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""DNC access modules."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import sonnet as snt
|
||||
import tensorflow as tf
|
||||
|
||||
import addressing
|
||||
import util
|
||||
|
||||
AccessState = collections.namedtuple('AccessState', (
|
||||
'memory', 'read_weights', 'write_weights', 'linkage', 'usage'))
|
||||
|
||||
|
||||
def _erase_and_write(memory, address, reset_weights, values):
|
||||
"""Module to erase and write in the external memory.
|
||||
|
||||
Erase operation:
|
||||
M_t'(i) = M_{t-1}(i) * (1 - w_t(i) * e_t)
|
||||
|
||||
Add operation:
|
||||
M_t(i) = M_t'(i) + w_t(i) * a_t
|
||||
|
||||
where e are the reset_weights, w the write weights and a the values.
|
||||
|
||||
Args:
|
||||
memory: 3-D tensor of shape `[batch_size, memory_size, word_size]`.
|
||||
address: 3-D tensor `[batch_size, num_writes, memory_size]`.
|
||||
reset_weights: 3-D tensor `[batch_size, num_writes, word_size]`.
|
||||
values: 3-D tensor `[batch_size, num_writes, word_size]`.
|
||||
|
||||
Returns:
|
||||
3-D tensor of shape `[batch_size, num_writes, word_size]`.
|
||||
"""
|
||||
with tf.name_scope('erase_memory', values=[memory, address, reset_weights]):
|
||||
expand_address = tf.expand_dims(address, 3)
|
||||
reset_weights = tf.expand_dims(reset_weights, 2)
|
||||
weighted_resets = expand_address * reset_weights
|
||||
reset_gate = tf.reduce_prod(1 - weighted_resets, [1])
|
||||
memory *= reset_gate
|
||||
|
||||
with tf.name_scope('additive_write', values=[memory, address, values]):
|
||||
add_matrix = tf.matmul(address, values, adjoint_a=True)
|
||||
memory += add_matrix
|
||||
|
||||
return memory
|
||||
|
||||
|
||||
class MemoryAccess(snt.RNNCore):
|
||||
"""Access module of the Differentiable Neural Computer.
|
||||
|
||||
This memory module supports multiple read and write heads. It makes use of:
|
||||
|
||||
* `addressing.TemporalLinkage` to track the temporal ordering of writes in
|
||||
memory for each write head.
|
||||
* `addressing.FreenessAllocator` for keeping track of memory usage, where
|
||||
usage increase when a memory location is written to, and decreases when
|
||||
memory is read from that the controller says can be freed.
|
||||
|
||||
Write-address selection is done by an interpolation between content-based
|
||||
lookup and using unused memory.
|
||||
|
||||
Read-address selection is done by an interpolation of content-based lookup
|
||||
and following the link graph in the forward or backwards read direction.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
memory_size=128,
|
||||
word_size=20,
|
||||
num_reads=1,
|
||||
num_writes=1,
|
||||
name='memory_access'):
|
||||
"""Creates a MemoryAccess module.
|
||||
|
||||
Args:
|
||||
memory_size: The number of memory slots (N in the DNC paper).
|
||||
word_size: The width of each memory slot (W in the DNC paper)
|
||||
num_reads: The number of read heads (R in the DNC paper).
|
||||
num_writes: The number of write heads (fixed at 1 in the paper).
|
||||
name: The name of the module.
|
||||
"""
|
||||
super(MemoryAccess, self).__init__(name=name)
|
||||
self._memory_size = memory_size
|
||||
self._word_size = word_size
|
||||
self._num_reads = num_reads
|
||||
self._num_writes = num_writes
|
||||
|
||||
self._write_content_weights_mod = addressing.CosineWeights(
|
||||
num_writes, word_size, name='write_content_weights')
|
||||
self._read_content_weights_mod = addressing.CosineWeights(
|
||||
num_reads, word_size, name='read_content_weights')
|
||||
|
||||
self._linkage = addressing.TemporalLinkage(memory_size, num_writes)
|
||||
self._freeness = addressing.Freeness(memory_size)
|
||||
|
||||
def _build(self, inputs, prev_state):
|
||||
"""Connects the MemoryAccess module into the graph.
|
||||
|
||||
Args:
|
||||
inputs: tensor of shape `[batch_size, input_size]`. This is used to
|
||||
control this access module.
|
||||
prev_state: Instance of `AccessState` containing the previous state.
|
||||
|
||||
Returns:
|
||||
A tuple `(output, next_state)`, where `output` is a tensor of shape
|
||||
`[batch_size, num_reads, word_size]`, and `next_state` is the new
|
||||
`AccessState` named tuple at the current time t.
|
||||
"""
|
||||
inputs = self._read_inputs(inputs)
|
||||
|
||||
# Update usage using inputs['free_gate'] and previous read & write weights.
|
||||
usage = self._freeness(
|
||||
write_weights=prev_state.write_weights,
|
||||
free_gate=inputs['free_gate'],
|
||||
read_weights=prev_state.read_weights,
|
||||
prev_usage=prev_state.usage)
|
||||
|
||||
# Write to memory.
|
||||
write_weights = self._write_weights(inputs, prev_state.memory, usage)
|
||||
memory = _erase_and_write(
|
||||
prev_state.memory,
|
||||
address=write_weights,
|
||||
reset_weights=inputs['erase_vectors'],
|
||||
values=inputs['write_vectors'])
|
||||
|
||||
linkage_state = self._linkage(write_weights, prev_state.linkage)
|
||||
|
||||
# Read from memory.
|
||||
read_weights = self._read_weights(
|
||||
inputs,
|
||||
memory=memory,
|
||||
prev_read_weights=prev_state.read_weights,
|
||||
link=linkage_state.link)
|
||||
read_words = tf.matmul(read_weights, memory)
|
||||
|
||||
return (read_words, AccessState(
|
||||
memory=memory,
|
||||
read_weights=read_weights,
|
||||
write_weights=write_weights,
|
||||
linkage=linkage_state,
|
||||
usage=usage))
|
||||
|
||||
def _read_inputs(self, inputs):
|
||||
"""Applies transformations to `inputs` to get control for this module."""
|
||||
|
||||
def _linear(first_dim, second_dim, name, activation=None):
|
||||
"""Returns a linear transformation of `inputs`, followed by a reshape."""
|
||||
linear = snt.Linear(first_dim * second_dim, name=name)(inputs)
|
||||
if activation is not None:
|
||||
linear = activation(linear, name=name + '_activation')
|
||||
return tf.reshape(linear, [-1, first_dim, second_dim])
|
||||
|
||||
# v_t^i - The vectors to write to memory, for each write head `i`.
|
||||
write_vectors = _linear(self._num_writes, self._word_size, 'write_vectors')
|
||||
|
||||
# e_t^i - Amount to erase the memory by before writing, for each write head.
|
||||
erase_vectors = _linear(self._num_writes, self._word_size, 'erase_vectors',
|
||||
tf.sigmoid)
|
||||
|
||||
# f_t^j - Amount that the memory at the locations read from at the previous
|
||||
# time step can be declared unused, for each read head `j`.
|
||||
free_gate = tf.sigmoid(
|
||||
snt.Linear(self._num_reads, name='free_gate')(inputs))
|
||||
|
||||
# g_t^{a, i} - Interpolation between writing to unallocated memory and
|
||||
# content-based lookup, for each write head `i`. Note: `a` is simply used to
|
||||
# identify this gate with allocation vs writing (as defined below).
|
||||
allocation_gate = tf.sigmoid(
|
||||
snt.Linear(self._num_writes, name='allocation_gate')(inputs))
|
||||
|
||||
# g_t^{w, i} - Overall gating of write amount for each write head.
|
||||
write_gate = tf.sigmoid(
|
||||
snt.Linear(self._num_writes, name='write_gate')(inputs))
|
||||
|
||||
# \pi_t^j - Mixing between "backwards" and "forwards" positions (for
|
||||
# each write head), and content-based lookup, for each read head.
|
||||
num_read_modes = 1 + 2 * self._num_writes
|
||||
read_mode = snt.BatchApply(tf.nn.softmax)(
|
||||
_linear(self._num_reads, num_read_modes, name='read_mode'))
|
||||
|
||||
# Parameters for the (read / write) "weights by content matching" modules.
|
||||
write_keys = _linear(self._num_writes, self._word_size, 'write_keys')
|
||||
write_strengths = snt.Linear(self._num_writes, name='write_strengths')(
|
||||
inputs)
|
||||
|
||||
read_keys = _linear(self._num_reads, self._word_size, 'read_keys')
|
||||
read_strengths = snt.Linear(self._num_reads, name='read_strengths')(inputs)
|
||||
|
||||
result = {
|
||||
'read_content_keys': read_keys,
|
||||
'read_content_strengths': read_strengths,
|
||||
'write_content_keys': write_keys,
|
||||
'write_content_strengths': write_strengths,
|
||||
'write_vectors': write_vectors,
|
||||
'erase_vectors': erase_vectors,
|
||||
'free_gate': free_gate,
|
||||
'allocation_gate': allocation_gate,
|
||||
'write_gate': write_gate,
|
||||
'read_mode': read_mode,
|
||||
}
|
||||
return result
|
||||
|
||||
def _write_weights(self, inputs, memory, usage):
|
||||
"""Calculates the memory locations to write to.
|
||||
|
||||
This uses a combination of content-based lookup and finding an unused
|
||||
location in memory, for each write head.
|
||||
|
||||
Args:
|
||||
inputs: Collection of inputs to the access module, including controls for
|
||||
how to chose memory writing, such as the content to look-up and the
|
||||
weighting between content-based and allocation-based addressing.
|
||||
memory: A tensor of shape `[batch_size, memory_size, word_size]`
|
||||
containing the current memory contents.
|
||||
usage: Current memory usage, which is a tensor of shape `[batch_size,
|
||||
memory_size]`, used for allocation-based addressing.
|
||||
|
||||
Returns:
|
||||
tensor of shape `[batch_size, num_writes, memory_size]` indicating where
|
||||
to write to (if anywhere) for each write head.
|
||||
"""
|
||||
with tf.name_scope('write_weights', values=[inputs, memory, usage]):
|
||||
# c_t^{w, i} - The content-based weights for each write head.
|
||||
write_content_weights = self._write_content_weights_mod(
|
||||
memory, inputs['write_content_keys'],
|
||||
inputs['write_content_strengths'])
|
||||
|
||||
# a_t^i - The allocation weights for each write head.
|
||||
write_allocation_weights = self._freeness.write_allocation_weights(
|
||||
usage=usage,
|
||||
write_gates=(inputs['allocation_gate'] * inputs['write_gate']),
|
||||
num_writes=self._num_writes)
|
||||
|
||||
# Expands gates over memory locations.
|
||||
allocation_gate = tf.expand_dims(inputs['allocation_gate'], -1)
|
||||
write_gate = tf.expand_dims(inputs['write_gate'], -1)
|
||||
|
||||
# w_t^{w, i} - The write weightings for each write head.
|
||||
return write_gate * (allocation_gate * write_allocation_weights +
|
||||
(1 - allocation_gate) * write_content_weights)
|
||||
|
||||
def _read_weights(self, inputs, memory, prev_read_weights, link):
|
||||
"""Calculates read weights for each read head.
|
||||
|
||||
The read weights are a combination of following the link graphs in the
|
||||
forward or backward directions from the previous read position, and doing
|
||||
content-based lookup. The interpolation between these different modes is
|
||||
done by `inputs['read_mode']`.
|
||||
|
||||
Args:
|
||||
inputs: Controls for this access module. This contains the content-based
|
||||
keys to lookup, and the weightings for the different read modes.
|
||||
memory: A tensor of shape `[batch_size, memory_size, word_size]`
|
||||
containing the current memory contents to do content-based lookup.
|
||||
prev_read_weights: A tensor of shape `[batch_size, num_reads,
|
||||
memory_size]` containing the previous read locations.
|
||||
link: A tensor of shape `[batch_size, num_writes, memory_size,
|
||||
memory_size]` containing the temporal write transition graphs.
|
||||
|
||||
Returns:
|
||||
A tensor of shape `[batch_size, num_reads, memory_size]` containing the
|
||||
read weights for each read head.
|
||||
"""
|
||||
with tf.name_scope(
|
||||
'read_weights', values=[inputs, memory, prev_read_weights, link]):
|
||||
# c_t^{r, i} - The content weightings for each read head.
|
||||
content_weights = self._read_content_weights_mod(
|
||||
memory, inputs['read_content_keys'], inputs['read_content_strengths'])
|
||||
|
||||
# Calculates f_t^i and b_t^i.
|
||||
forward_weights = self._linkage.directional_read_weights(
|
||||
link, prev_read_weights, forward=True)
|
||||
backward_weights = self._linkage.directional_read_weights(
|
||||
link, prev_read_weights, forward=False)
|
||||
|
||||
backward_mode = inputs['read_mode'][:, :, :self._num_writes]
|
||||
forward_mode = (
|
||||
inputs['read_mode'][:, :, self._num_writes:2 * self._num_writes])
|
||||
content_mode = inputs['read_mode'][:, :, 2 * self._num_writes]
|
||||
|
||||
read_weights = (
|
||||
tf.expand_dims(content_mode, 2) * content_weights + tf.reduce_sum(
|
||||
tf.expand_dims(forward_mode, 3) * forward_weights, 2) +
|
||||
tf.reduce_sum(tf.expand_dims(backward_mode, 3) * backward_weights, 2))
|
||||
|
||||
return read_weights
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
"""Returns a tuple of the shape of the state tensors."""
|
||||
return AccessState(
|
||||
memory=tf.TensorShape([self._memory_size, self._word_size]),
|
||||
read_weights=tf.TensorShape([self._num_reads, self._memory_size]),
|
||||
write_weights=tf.TensorShape([self._num_writes, self._memory_size]),
|
||||
linkage=self._linkage.state_size,
|
||||
usage=self._freeness.state_size)
|
||||
|
||||
@property
|
||||
def output_size(self):
|
||||
"""Returns the output shape."""
|
||||
return tf.TensorShape([self._num_reads, self._word_size])
|
||||
@@ -0,0 +1,410 @@
|
||||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""DNC addressing modules."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import sonnet as snt
|
||||
import tensorflow as tf
|
||||
|
||||
import util
|
||||
|
||||
# Ensure values are greater than epsilon to avoid numerical instability.
|
||||
_EPSILON = 1e-6
|
||||
|
||||
TemporalLinkageState = collections.namedtuple('TemporalLinkageState',
|
||||
('link', 'precedence_weights'))
|
||||
|
||||
|
||||
def _vector_norms(m):
|
||||
squared_norms = tf.reduce_sum(m * m, axis=2, keep_dims=True)
|
||||
return tf.sqrt(squared_norms + _EPSILON)
|
||||
|
||||
|
||||
def weighted_softmax(activations, strengths, strengths_op):
|
||||
"""Returns softmax over activations multiplied by positive strengths.
|
||||
|
||||
Args:
|
||||
activations: A tensor of shape `[batch_size, num_heads, memory_size]`, of
|
||||
activations to be transformed. Softmax is taken over the last dimension.
|
||||
strengths: A tensor of shape `[batch_size, num_heads]` containing strengths to
|
||||
multiply by the activations prior to the softmax.
|
||||
strengths_op: An operation to transform strengths before softmax.
|
||||
|
||||
Returns:
|
||||
A tensor of same shape as `activations` with weighted softmax applied.
|
||||
"""
|
||||
transformed_strengths = tf.expand_dims(strengths_op(strengths), -1)
|
||||
sharp_activations = activations * transformed_strengths
|
||||
softmax = snt.BatchApply(module_or_op=tf.nn.softmax)
|
||||
return softmax(sharp_activations)
|
||||
|
||||
|
||||
class CosineWeights(snt.AbstractModule):
|
||||
"""Cosine-weighted attention.
|
||||
|
||||
Calculates the cosine similarity between a query and each word in memory, then
|
||||
applies a weighted softmax to return a sharp distribution.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
num_heads,
|
||||
word_size,
|
||||
strength_op=tf.nn.softplus,
|
||||
name='cosine_weights'):
|
||||
"""Initializes the CosineWeights module.
|
||||
|
||||
Args:
|
||||
num_heads: number of memory heads.
|
||||
word_size: memory word size.
|
||||
strength_op: operation to apply to strengths (default is tf.nn.softplus).
|
||||
name: module name (default 'cosine_weights')
|
||||
"""
|
||||
super(CosineWeights, self).__init__(name=name)
|
||||
self._num_heads = num_heads
|
||||
self._word_size = word_size
|
||||
self._strength_op = strength_op
|
||||
|
||||
def _build(self, memory, keys, strengths):
|
||||
"""Connects the CosineWeights module into the graph.
|
||||
|
||||
Args:
|
||||
memory: A 3-D tensor of shape `[batch_size, memory_size, word_size]`.
|
||||
keys: A 3-D tensor of shape `[batch_size, num_heads, word_size]`.
|
||||
strengths: A 2-D tensor of shape `[batch_size, num_heads]`.
|
||||
|
||||
Returns:
|
||||
Weights tensor of shape `[batch_size, num_heads, memory_size]`.
|
||||
"""
|
||||
# Calculates the inner product between the query vector and words in memory.
|
||||
dot = tf.matmul(keys, memory, adjoint_b=True)
|
||||
|
||||
# Outer product to compute denominator (euclidean norm of query and memory).
|
||||
memory_norms = _vector_norms(memory)
|
||||
key_norms = _vector_norms(keys)
|
||||
norm = tf.matmul(key_norms, memory_norms, adjoint_b=True)
|
||||
|
||||
# Calculates cosine similarity between the query vector and words in memory.
|
||||
similarity = dot / (norm + _EPSILON)
|
||||
|
||||
return weighted_softmax(similarity, strengths, self._strength_op)
|
||||
|
||||
|
||||
class TemporalLinkage(snt.RNNCore):
|
||||
"""Keeps track of write order for forward and backward addressing.
|
||||
|
||||
This is a pseudo-RNNCore module, whose state is a pair `(link,
|
||||
precedence_weights)`, where `link` is a (collection of) graphs for (possibly
|
||||
multiple) write heads (represented by a tensor with values in the range
|
||||
[0, 1]), and `precedence_weights` records the "previous write locations" used
|
||||
to build the link graphs.
|
||||
|
||||
The function `directional_read_weights` computes addresses following the
|
||||
forward and backward directions in the link graphs.
|
||||
"""
|
||||
|
||||
def __init__(self, memory_size, num_writes, name='temporal_linkage'):
|
||||
"""Construct a TemporalLinkage module.
|
||||
|
||||
Args:
|
||||
memory_size: The number of memory slots.
|
||||
num_writes: The number of write heads.
|
||||
name: Name of the module.
|
||||
"""
|
||||
super(TemporalLinkage, self).__init__(name=name)
|
||||
self._memory_size = memory_size
|
||||
self._num_writes = num_writes
|
||||
|
||||
def _build(self, write_weights, prev_state):
|
||||
"""Calculate the updated linkage state given the write weights.
|
||||
|
||||
Args:
|
||||
write_weights: A tensor of shape `[batch_size, num_writes, memory_size]`
|
||||
containing the memory addresses of the different write heads.
|
||||
prev_state: `TemporalLinkageState` tuple containg a tensor `link` of
|
||||
shape `[batch_size, num_writes, memory_size, memory_size]`, and a
|
||||
tensor `precedence_weights` of shape `[batch_size, num_writes,
|
||||
memory_size]` containing the aggregated history of recent writes.
|
||||
|
||||
Returns:
|
||||
A `TemporalLinkageState` tuple `next_state`, which contains the updated
|
||||
link and precedence weights.
|
||||
"""
|
||||
link = self._link(prev_state.link, prev_state.precedence_weights,
|
||||
write_weights)
|
||||
precedence_weights = self._precedence_weights(prev_state.precedence_weights,
|
||||
write_weights)
|
||||
return TemporalLinkageState(
|
||||
link=link, precedence_weights=precedence_weights)
|
||||
|
||||
def directional_read_weights(self, link, prev_read_weights, forward):
|
||||
"""Calculates the forward or the backward read weights.
|
||||
|
||||
For each read head (at a given address), there are `num_writes` link graphs
|
||||
to follow. Thus this function computes a read address for each of the
|
||||
`num_reads * num_writes` pairs of read and write heads.
|
||||
|
||||
Args:
|
||||
link: tensor of shape `[batch_size, num_writes, memory_size,
|
||||
memory_size]` representing the link graphs L_t.
|
||||
prev_read_weights: tensor of shape `[batch_size, num_reads,
|
||||
memory_size]` containing the previous read weights w_{t-1}^r.
|
||||
forward: Boolean indicating whether to follow the "future" direction in
|
||||
the link graph (True) or the "past" direction (False).
|
||||
|
||||
Returns:
|
||||
tensor of shape `[batch_size, num_reads, num_writes, memory_size]`
|
||||
"""
|
||||
with tf.name_scope('directional_read_weights'):
|
||||
# We calculate the forward and backward directions for each pair of
|
||||
# read and write heads; hence we need to tile the read weights and do a
|
||||
# sort of "outer product" to get this.
|
||||
expanded_read_weights = tf.stack([prev_read_weights] * self._num_writes,
|
||||
1)
|
||||
result = tf.matmul(expanded_read_weights, link, adjoint_b=forward)
|
||||
# Swap dimensions 1, 2 so order is [batch, reads, writes, memory]:
|
||||
return tf.transpose(result, perm=[0, 2, 1, 3])
|
||||
|
||||
def _link(self, prev_link, prev_precedence_weights, write_weights):
|
||||
"""Calculates the new link graphs.
|
||||
|
||||
For each write head, the link is a directed graph (represented by a matrix
|
||||
with entries in range [0, 1]) whose vertices are the memory locations, and
|
||||
an edge indicates temporal ordering of writes.
|
||||
|
||||
Args:
|
||||
prev_link: A tensor of shape `[batch_size, num_writes, memory_size,
|
||||
memory_size]` representing the previous link graphs for each write
|
||||
head.
|
||||
prev_precedence_weights: A tensor of shape `[batch_size, num_writes,
|
||||
memory_size]` which is the previous "aggregated" write weights for
|
||||
each write head.
|
||||
write_weights: A tensor of shape `[batch_size, num_writes, memory_size]`
|
||||
containing the new locations in memory written to.
|
||||
|
||||
Returns:
|
||||
A tensor of shape `[batch_size, num_writes, memory_size, memory_size]`
|
||||
containing the new link graphs for each write head.
|
||||
"""
|
||||
with tf.name_scope('link'):
|
||||
batch_size = prev_link.get_shape()[0].value
|
||||
write_weights_i = tf.expand_dims(write_weights, 3)
|
||||
write_weights_j = tf.expand_dims(write_weights, 2)
|
||||
prev_precedence_weights_j = tf.expand_dims(prev_precedence_weights, 2)
|
||||
prev_link_scale = 1 - write_weights_i - write_weights_j
|
||||
new_link = write_weights_i * prev_precedence_weights_j
|
||||
link = prev_link_scale * prev_link + new_link
|
||||
# Return the link with the diagonal set to zero, to remove self-looping
|
||||
# edges.
|
||||
return tf.matrix_set_diag(
|
||||
link,
|
||||
tf.zeros(
|
||||
[batch_size, self._num_writes, self._memory_size],
|
||||
dtype=link.dtype))
|
||||
|
||||
def _precedence_weights(self, prev_precedence_weights, write_weights):
|
||||
"""Calculates the new precedence weights given the current write weights.
|
||||
|
||||
The precedence weights are the "aggregated write weights" for each write
|
||||
head, where write weights with sum close to zero will leave the precedence
|
||||
weights unchanged, but with sum close to one will replace the precedence
|
||||
weights.
|
||||
|
||||
Args:
|
||||
prev_precedence_weights: A tensor of shape `[batch_size, num_writes,
|
||||
memory_size]` containing the previous precedence weights.
|
||||
write_weights: A tensor of shape `[batch_size, num_writes, memory_size]`
|
||||
containing the new write weights.
|
||||
|
||||
Returns:
|
||||
A tensor of shape `[batch_size, num_writes, memory_size]` containing the
|
||||
new precedence weights.
|
||||
"""
|
||||
with tf.name_scope('precedence_weights'):
|
||||
write_sum = tf.reduce_sum(write_weights, 2, keep_dims=True)
|
||||
return (1 - write_sum) * prev_precedence_weights + write_weights
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
"""Returns a `TemporalLinkageState` tuple of the state tensors' shapes."""
|
||||
return TemporalLinkageState(
|
||||
link=tf.TensorShape(
|
||||
[self._num_writes, self._memory_size, self._memory_size]),
|
||||
precedence_weights=tf.TensorShape([self._num_writes,
|
||||
self._memory_size]),)
|
||||
|
||||
|
||||
class Freeness(snt.RNNCore):
|
||||
"""Memory usage that is increased by writing and decreased by reading.
|
||||
|
||||
This module is a pseudo-RNNCore whose state is a tensor with values in
|
||||
the range [0, 1] indicating the usage of each of `memory_size` memory slots.
|
||||
|
||||
The usage is:
|
||||
|
||||
* Increased by writing, where usage is increased towards 1 at the write
|
||||
addresses.
|
||||
* Decreased by reading, where usage is decreased after reading from a
|
||||
location when free_gate is close to 1.
|
||||
|
||||
The function `write_allocation_weights` can be invoked to get free locations
|
||||
to write to for a number of write heads.
|
||||
"""
|
||||
|
||||
def __init__(self, memory_size, name='freeness'):
|
||||
"""Creates a Freeness module.
|
||||
|
||||
Args:
|
||||
memory_size: Number of memory slots.
|
||||
name: Name of the module.
|
||||
"""
|
||||
super(Freeness, self).__init__(name=name)
|
||||
self._memory_size = memory_size
|
||||
|
||||
def _build(self, write_weights, free_gate, read_weights, prev_usage):
|
||||
"""Calculates the new memory usage u_t.
|
||||
|
||||
Memory that was written to in the previous time step will have its usage
|
||||
increased; memory that was read from and the controller says can be "freed"
|
||||
will have its usage decreased.
|
||||
|
||||
Args:
|
||||
write_weights: tensor of shape `[batch_size, num_writes,
|
||||
memory_size]` giving write weights at previous time step.
|
||||
free_gate: tensor of shape `[batch_size, num_reads]` which indicates
|
||||
which read heads read memory that can now be freed.
|
||||
read_weights: tensor of shape `[batch_size, num_reads,
|
||||
memory_size]` giving read weights at previous time step.
|
||||
prev_usage: tensor of shape `[batch_size, memory_size]` giving
|
||||
usage u_{t - 1} at the previous time step, with entries in range
|
||||
[0, 1].
|
||||
|
||||
Returns:
|
||||
tensor of shape `[batch_size, memory_size]` representing updated memory
|
||||
usage.
|
||||
"""
|
||||
# Calculation of usage is not differentiable with respect to write weights.
|
||||
write_weights = tf.stop_gradient(write_weights)
|
||||
usage = self._usage_after_write(prev_usage, write_weights)
|
||||
usage = self._usage_after_read(usage, free_gate, read_weights)
|
||||
return usage
|
||||
|
||||
def write_allocation_weights(self, usage, write_gates, num_writes):
|
||||
"""Calculates freeness-based locations for writing to.
|
||||
|
||||
This finds unused memory by ranking the memory locations by usage, for each
|
||||
write head. (For more than one write head, we use a "simulated new usage"
|
||||
which takes into account the fact that the previous write head will increase
|
||||
the usage in that area of the memory.)
|
||||
|
||||
Args:
|
||||
usage: A tensor of shape `[batch_size, memory_size]` representing
|
||||
current memory usage.
|
||||
write_gates: A tensor of shape `[batch_size, num_writes]` with values in
|
||||
the range [0, 1] indicating how much each write head does writing
|
||||
based on the address returned here (and hence how much usage
|
||||
increases).
|
||||
num_writes: The number of write heads to calculate write weights for.
|
||||
|
||||
Returns:
|
||||
tensor of shape `[batch_size, num_writes, memory_size]` containing the
|
||||
freeness-based write locations. Note that this isn't scaled by
|
||||
`write_gate`; this scaling must be applied externally.
|
||||
"""
|
||||
with tf.name_scope('write_allocation_weights'):
|
||||
# expand gatings over memory locations
|
||||
write_gates = tf.expand_dims(write_gates, -1)
|
||||
|
||||
allocation_weights = []
|
||||
for i in range(num_writes):
|
||||
allocation_weights.append(self._allocation(usage))
|
||||
# update usage to take into account writing to this new allocation
|
||||
usage += ((1 - usage) * write_gates[:, i, :] * allocation_weights[i])
|
||||
|
||||
# Pack the allocation weights for the write heads into one tensor.
|
||||
return tf.stack(allocation_weights, axis=1)
|
||||
|
||||
def _usage_after_write(self, prev_usage, write_weights):
|
||||
"""Calcualtes the new usage after writing to memory.
|
||||
|
||||
Args:
|
||||
prev_usage: tensor of shape `[batch_size, memory_size]`.
|
||||
write_weights: tensor of shape `[batch_size, num_writes, memory_size]`.
|
||||
|
||||
Returns:
|
||||
New usage, a tensor of shape `[batch_size, memory_size]`.
|
||||
"""
|
||||
with tf.name_scope('usage_after_write'):
|
||||
# Calculate the aggregated effect of all write heads
|
||||
write_weights = 1 - tf.reduce_prod(1 - write_weights, [1])
|
||||
return prev_usage + (1 - prev_usage) * write_weights
|
||||
|
||||
def _usage_after_read(self, prev_usage, free_gate, read_weights):
|
||||
"""Calcualtes the new usage after reading and freeing from memory.
|
||||
|
||||
Args:
|
||||
prev_usage: tensor of shape `[batch_size, memory_size]`.
|
||||
free_gate: tensor of shape `[batch_size, num_reads]` with entries in the
|
||||
range [0, 1] indicating the amount that locations read from can be
|
||||
freed.
|
||||
read_weights: tensor of shape `[batch_size, num_reads, memory_size]`.
|
||||
|
||||
Returns:
|
||||
New usage, a tensor of shape `[batch_size, memory_size]`.
|
||||
"""
|
||||
with tf.name_scope('usage_after_read'):
|
||||
free_gate = tf.expand_dims(free_gate, -1)
|
||||
free_read_weights = free_gate * read_weights
|
||||
phi = tf.reduce_prod(1 - free_read_weights, [1], name='phi')
|
||||
return prev_usage * phi
|
||||
|
||||
def _allocation(self, usage):
|
||||
r"""Computes allocation by sorting `usage`.
|
||||
|
||||
This corresponds to the value a = a_t[\phi_t[j]] in the paper.
|
||||
|
||||
Args:
|
||||
usage: tensor of shape `[batch_size, memory_size]` indicating current
|
||||
memory usage. This is equal to u_t in the paper when we only have one
|
||||
write head, but for multiple write heads, one should update the usage
|
||||
while iterating through the write heads to take into account the
|
||||
allocation returned by this function.
|
||||
|
||||
Returns:
|
||||
Tensor of shape `[batch_size, memory_size]` corresponding to allocation.
|
||||
"""
|
||||
with tf.name_scope('allocation'):
|
||||
# Ensure values are not too small prior to cumprod.
|
||||
usage = _EPSILON + (1 - _EPSILON) * usage
|
||||
|
||||
nonusage = 1 - usage
|
||||
sorted_nonusage, indices = tf.nn.top_k(
|
||||
nonusage, k=self._memory_size, name='sort')
|
||||
sorted_usage = 1 - sorted_nonusage
|
||||
prod_sorted_usage = tf.cumprod(sorted_usage, axis=1, exclusive=True)
|
||||
sorted_allocation = sorted_nonusage * prod_sorted_usage
|
||||
inverse_indices = util.batch_invert_permutation(indices)
|
||||
|
||||
# This final line "unsorts" sorted_allocation, so that the indexing
|
||||
# corresponds to the original indexing of `usage`.
|
||||
return util.batch_gather(sorted_allocation, inverse_indices)
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
"""Returns the shape of the state tensor."""
|
||||
return tf.TensorShape([self._memory_size])
|
||||
@@ -0,0 +1,41 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
def reducedimension(input_, dimension = 2, learning_rate = 0.01, hidden_layer = 256, epoch = 20):
|
||||
|
||||
input_size = input_.shape[1]
|
||||
X = tf.placeholder("float", [None, input_size])
|
||||
|
||||
weights = {
|
||||
'encoder_h1': tf.Variable(tf.random_normal([input_size, hidden_layer])),
|
||||
'encoder_h2': tf.Variable(tf.random_normal([hidden_layer, dimension])),
|
||||
'decoder_h1': tf.Variable(tf.random_normal([dimension, hidden_layer])),
|
||||
'decoder_h2': tf.Variable(tf.random_normal([hidden_layer, input_size])),
|
||||
}
|
||||
|
||||
biases = {
|
||||
'encoder_b1': tf.Variable(tf.random_normal([hidden_layer])),
|
||||
'encoder_b2': tf.Variable(tf.random_normal([dimension])),
|
||||
'decoder_b1': tf.Variable(tf.random_normal([hidden_layer])),
|
||||
'decoder_b2': tf.Variable(tf.random_normal([input_size])),
|
||||
}
|
||||
|
||||
first_layer_encoder = tf.nn.sigmoid(tf.add(tf.matmul(X, weights['encoder_h1']), biases['encoder_b1']))
|
||||
second_layer_encoder = tf.nn.sigmoid(tf.add(tf.matmul(first_layer_encoder, weights['encoder_h2']), biases['encoder_b2']))
|
||||
first_layer_decoder = tf.nn.sigmoid(tf.add(tf.matmul(second_layer_encoder, weights['decoder_h1']), biases['decoder_b1']))
|
||||
second_layer_decoder = tf.nn.sigmoid(tf.add(tf.matmul(first_layer_decoder, weights['decoder_h2']), biases['decoder_b2']))
|
||||
cost = tf.reduce_mean(tf.pow(X - second_layer_decoder, 2))
|
||||
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
|
||||
sess = tf.InteractiveSession()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
|
||||
for i in range(epoch):
|
||||
last_time = time.time()
|
||||
_, loss = sess.run([optimizer, cost], feed_dict={X: input_})
|
||||
if (i + 1) % 10 == 0:
|
||||
print('epoch:', i + 1, 'loss:', loss, 'time:', time.time() - last_time)
|
||||
|
||||
vectors = sess.run(second_layer_encoder, feed_dict={X: input_})
|
||||
tf.reset_default_graph()
|
||||
return vectors
|
||||
@@ -0,0 +1,142 @@
|
||||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""DNC Cores.
|
||||
|
||||
These modules create a DNC core. They take input, pass parameters to the memory
|
||||
access module, and integrate the output of memory to form an output.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import numpy as np
|
||||
import sonnet as snt
|
||||
import tensorflow as tf
|
||||
|
||||
import access
|
||||
|
||||
DNCState = collections.namedtuple('DNCState', ('access_output', 'access_state',
|
||||
'controller_state'))
|
||||
|
||||
|
||||
class DNC(snt.RNNCore):
|
||||
"""DNC core module.
|
||||
|
||||
Contains controller and memory access module.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
access_config,
|
||||
controller_config,
|
||||
output_size,
|
||||
clip_value=None,
|
||||
name='dnc'):
|
||||
"""Initializes the DNC core.
|
||||
|
||||
Args:
|
||||
access_config: dictionary of access module configurations.
|
||||
controller_config: dictionary of controller (LSTM) module configurations.
|
||||
output_size: output dimension size of core.
|
||||
clip_value: clips controller and core output values to between
|
||||
`[-clip_value, clip_value]` if specified.
|
||||
name: module name (default 'dnc').
|
||||
|
||||
Raises:
|
||||
TypeError: if direct_input_size is not None for any access module other
|
||||
than KeyValueMemory.
|
||||
"""
|
||||
super(DNC, self).__init__(name=name)
|
||||
|
||||
with self._enter_variable_scope():
|
||||
self._controller = snt.LSTM(**controller_config)
|
||||
self._access = access.MemoryAccess(**access_config)
|
||||
|
||||
self._access_output_size = np.prod(self._access.output_size.as_list())
|
||||
self._output_size = output_size
|
||||
self._clip_value = clip_value or 0
|
||||
|
||||
self._output_size = tf.TensorShape([output_size])
|
||||
self._state_size = DNCState(
|
||||
access_output=self._access_output_size,
|
||||
access_state=self._access.state_size,
|
||||
controller_state=self._controller.state_size)
|
||||
|
||||
def _clip_if_enabled(self, x):
|
||||
if self._clip_value > 0:
|
||||
return tf.clip_by_value(x, -self._clip_value, self._clip_value)
|
||||
else:
|
||||
return x
|
||||
|
||||
def _build(self, inputs, prev_state):
|
||||
"""Connects the DNC core into the graph.
|
||||
|
||||
Args:
|
||||
inputs: Tensor input.
|
||||
prev_state: A `DNCState` tuple containing the fields `access_output`,
|
||||
`access_state` and `controller_state`. `access_state` is a 3-D Tensor
|
||||
of shape `[batch_size, num_reads, word_size]` containing read words.
|
||||
`access_state` is a tuple of the access module's state, and
|
||||
`controller_state` is a tuple of controller module's state.
|
||||
|
||||
Returns:
|
||||
A tuple `(output, next_state)` where `output` is a tensor and `next_state`
|
||||
is a `DNCState` tuple containing the fields `access_output`,
|
||||
`access_state`, and `controller_state`.
|
||||
"""
|
||||
|
||||
prev_access_output = prev_state.access_output
|
||||
prev_access_state = prev_state.access_state
|
||||
prev_controller_state = prev_state.controller_state
|
||||
|
||||
batch_flatten = snt.BatchFlatten()
|
||||
controller_input = tf.concat(
|
||||
[batch_flatten(inputs), batch_flatten(prev_access_output)], 1)
|
||||
|
||||
controller_output, controller_state = self._controller(
|
||||
controller_input, prev_controller_state)
|
||||
|
||||
controller_output = self._clip_if_enabled(controller_output)
|
||||
controller_state = snt.nest.map(self._clip_if_enabled, controller_state)
|
||||
|
||||
access_output, access_state = self._access(controller_output,
|
||||
prev_access_state)
|
||||
|
||||
output = tf.concat([controller_output, batch_flatten(access_output)], 1)
|
||||
output = snt.Linear(
|
||||
output_size=self._output_size.as_list()[0],
|
||||
name='output_linear')(output)
|
||||
output = self._clip_if_enabled(output)
|
||||
|
||||
return output, DNCState(
|
||||
access_output=access_output,
|
||||
access_state=access_state,
|
||||
controller_state=controller_state)
|
||||
|
||||
def initial_state(self, batch_size, dtype=tf.float32):
|
||||
return DNCState(
|
||||
controller_state=self._controller.initial_state(batch_size, dtype),
|
||||
access_state=self._access.initial_state(batch_size, dtype),
|
||||
access_output=tf.zeros(
|
||||
[batch_size] + self._access.output_size.as_list(), dtype))
|
||||
|
||||
@property
|
||||
def state_size(self):
|
||||
return self._state_size
|
||||
|
||||
@property
|
||||
def output_size(self):
|
||||
return self._output_size
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,45 @@
|
||||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""DNC util ops and modules."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def batch_invert_permutation(permutations):
|
||||
"""Returns batched `tf.invert_permutation` for every row in `permutations`."""
|
||||
with tf.name_scope('batch_invert_permutation', values=[permutations]):
|
||||
unpacked = tf.unstack(permutations)
|
||||
inverses = [tf.invert_permutation(permutation) for permutation in unpacked]
|
||||
return tf.stack(inverses)
|
||||
|
||||
|
||||
def batch_gather(values, indices):
|
||||
"""Returns batched `tf.gather` for every row in the input."""
|
||||
with tf.name_scope('batch_gather', values=[values, indices]):
|
||||
unpacked = zip(tf.unstack(values), tf.unstack(indices))
|
||||
result = [tf.gather(value, index) for value, index in unpacked]
|
||||
return tf.stack(result)
|
||||
|
||||
|
||||
def one_hot(length, index):
|
||||
"""Return an nd array of given `length` filled with 0s and a 1 at `index`."""
|
||||
result = np.zeros(length)
|
||||
result[index] = 1
|
||||
return result
|
||||
Reference in New Issue
Block a user