Repository: RichardYang40148/MidiNet
Branch: master
Commit: 1b0b4849996d
Files: 10
Total size: 27.0 KB
Directory structure:
gitextract_b5avewm9/
└── v1/
├── README.md
├── generated_midi/
│ ├── README.md
│ ├── gen0.mid
│ ├── gen1.mid
│ └── gen2.mid
├── main.py
├── model.py
├── ops.py
├── samples/
│ └── README.md
└── utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: v1/README.md
================================================
This repository contains the original source code of [MdidNet : A Convolutional Generative Adversarial Network for Symbolic-domain Music Generation](https://arxiv.org/abs/1703.10847).
Now you can find the pytorch implementation [in this repo](https://github.com/annahung31/MidiNet-by-pytorch).
## Follow-up research of MidiNet
If you are interesting to the follow-up research of MidiNet, please check out [MuseGAN: Symbolic-domain Music Generation and Accompaniment with Multi-track Sequential Generative Adversarial Networks](https://salu133445.github.io/musegan/).
Which we have a more detailed explanation of the data format (piano roll like feature with higher resolution compare to MidiNet).
## Notes
This is a slightly modified version of the model that we presented in the above paper, you can find notations in the code if the parameters differ from the paper.
We also provide a preliminary result of the model, which aims to help those who are interested of implementing MidiNet to gain more concept of the data format.
## Instructions
The repository contains a preliminary trained model, which is trained under only 50496 midi bars(augmented from 4208 bars), so the generator might sounds not so "creative".
The model could be downloaded from the [link](https://drive.google.com/open?id=0B_B9afNKo7IqN205MzdLRFlOZzA).
It's quite fun to use Tencorboard to check out the model's training process:
```
tensorboard --logdir=log/
```
You can check out the loss in the training, and the embedding visulizations of real and fake datas.
To train by your own dataset:
```
1. change line 134-136 to your data path
2. run main.py --is_train True
```
## Acknowledgment
These scripts are refer to [A tensorflow implementation of "Deep Convolutional Generative Adversarial Networks](https://github.com/carpedm20/DCGAN-tensorflow)
Thanks to Taehoon Kim / @carpedm20 for releasing such a decent DCGAN implementaion
## Requirements
[Tensorflow 0.12](https://github.com/tensorflow/tensorflow/tree/r0.12)
[python-midi](https://github.com/vishnubob/python-midi)
================================================
FILE: v1/generated_midi/README.md
================================================
generated sample of the provided model
================================================
FILE: v1/model.py
================================================
#These scripts are refer to "https://github.com/carpedm20/DCGAN-tensorflow"
from __future__ import division
import os
import time
from glob import glob
import tensorflow as tf
import numpy as np
from six.moves import xrange
import SharedArray as sa
from sklearn.utils import shuffle
from ops import *
from utils import *
class MidiNet(object):
def __init__(self, sess, is_crop=False,
batch_size=72, sample_size = 72, output_w=16,output_h=128,
y_dim=None, prev_dim=1, z_dim=100, gf_dim=64, df_dim=64,
gfc_dim=1024, dfc_dim=1024, c_dim=1, dataset_name='default',
checkpoint_dir=None, sample_dir=None, gen_dir= None):
self.sess = sess
self.is_crop = is_crop
self.is_grayscale = (c_dim == 1)
self.batch_size = batch_size
self.sample_size = sample_size
self.output_w = output_w
self.output_h = output_h
self.y_dim = y_dim
self.prev_dim = prev_dim
self.z_dim = z_dim
self.gf_dim = gf_dim
self.df_dim = df_dim
self.gfc_dim = gfc_dim
self.dfc_dim = dfc_dim
self.c_dim = c_dim
# batch normalization : deals with poor initialization helps gradient flow
self.d_bn0 = batch_norm(name='d_bn0')
self.d_bn1 = batch_norm(name='d_bn1')
self.d_bn2 = batch_norm(name='d_bn2')
if not self.y_dim:
self.d_bn3 = batch_norm(name='d_bn3')
if self.prev_dim:
self.g_prev_bn0 = batch_norm(name='g_prev_bn0')
self.g_prev_bn1 = batch_norm(name='g_prev_bn1')
self.g_prev_bn2 = batch_norm(name='g_prev_bn2')
self.g_prev_bn3 = batch_norm(name='g_prev_bn3')
self.g_bn0 = batch_norm(name='g_bn0')
self.g_bn1 = batch_norm(name='g_bn1')
self.g_bn2 = batch_norm(name='g_bn2')
self.g_bn3 = batch_norm(name='g_bn3')
self.g_bn4 = batch_norm(name='g_bn4')
self.dataset_name = dataset_name
self.checkpoint_dir = checkpoint_dir
self.build_model()
def build_model(self):
self.y= tf.placeholder(tf.float32, [self.batch_size, self.y_dim], name='y')
self.prev_bar = tf.placeholder(tf.float32, [self.batch_size] + [self.output_w, self.output_h, self.c_dim],
name='prev_bar')
self.images = tf.placeholder(tf.float32, [self.batch_size] + [self.output_w, self.output_h, self.c_dim],
name='real_images')
self.sample_images= tf.placeholder(tf.float32, [self.sample_size] + [self.output_w, self.output_h, self.c_dim],
name='sample_images')
self.z = tf.placeholder(tf.float32, [None, self.z_dim],
name='z')
self.z_sum = tf.summary.histogram("z", self.z)
self.G = self.generator(self.z, self.y, self.prev_bar)
self.D, self.D_logits, self.fm = self.discriminator(self.images, self.y, reuse=False)
self.sampler = self.sampler(self.z, self.y, self.prev_bar)
self.D_, self.D_logits_ , self.fm_ = self.discriminator(self.G, self.y, reuse=True)
self.d_sum = tf.summary.histogram("d", self.D)
self.d__sum = tf.summary.histogram("d_", self.D_)
self.G_sum = tf.summary.image("G", self.G)
self.d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.D_logits, 0.9*tf.ones_like(self.D)))
self.d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.D_logits_, tf.zeros_like(self.D_)))
self.g_loss0 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.D_logits_, tf.ones_like(self.D_)))
#Feature Matching
self.features_from_g = tf.reduce_mean(self.fm_, reduction_indices=(0))
self.features_from_i = tf.reduce_mean(self.fm, reduction_indices=(0))
self.fm_g_loss1 =tf.mul(tf.nn.l2_loss(self.features_from_g - self.features_from_i), 0.1)
self.mean_image_from_g = tf.reduce_mean(self.G, reduction_indices=(0))
self.mean_image_from_i = tf.reduce_mean(self.images, reduction_indices=(0))
self.fm_g_loss2 = tf.mul(tf.nn.l2_loss(self.mean_image_from_g - self.mean_image_from_i), 0.01)
self.d_loss_real_sum = tf.summary.scalar("d_loss_real", self.d_loss_real)
self.d_loss_fake_sum = tf.summary.scalar("d_loss_fake", self.d_loss_fake)
self.d_loss = self.d_loss_real + self.d_loss_fake
self.g_loss = self.g_loss0 + self.fm_g_loss1 + self.fm_g_loss2
self.g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)
self.d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
t_vars = tf.trainable_variables()
self.d_vars = [var for var in t_vars if 'd_' in var.name]
self.g_vars = [var for var in t_vars if 'g_' in var.name]
self.saver = tf.train.Saver()
def train(self, config):
if config.dataset == 'MidiNet_v1':
# change the file path to your dataset
data_X = np.load('your_training_data.npy') #Shape: (n, 1, 16, 128), where n is the number of measures(bars) of training data.
prev_X = np.load('your_training_data_previous_bar.npy') #Shape: (n, 1, 16, 128), if the bar is a first bar, it's previous bar = np.zeros(1,16,128)
data_y = np.load('your_chord_annotation.npy') #1D chord condition
data_X, prev_X, data_y = shuffle(data_X,prev_X,data_y, random_state=0)
data_X = np.transpose(data_X,(0,2,3,1))
prev_X = np.transpose(prev_X,(0,2,3,1))
print prev_X.shape
d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
.minimize(self.d_loss, var_list=self.d_vars)
g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
.minimize(self.g_loss, var_list=self.g_vars)
tf.global_variables_initializer().run()
self.g_sum = tf.summary.merge([self.z_sum, self.d__sum,
self.G_sum, self.d_loss_fake_sum, self.g_loss_sum])
self.d_sum = tf.summary.merge([self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum])
self.writer = tf.summary.FileWriter("./logs", self.sess.graph)
sample_z = np.random.normal(0, 1, size=(self.sample_size , self.z_dim))
sample_files = data_X[0:self.sample_size]
save_images(data_X[np.arange(len(data_X))[:5]]*1, [1, 5],
'./{}/Train.png'.format(config.sample_dir))
sample_images = data_X[0:self.sample_size]
counter = 0
start_time = time.time()
if self.load(self.checkpoint_dir):
print(" [*] Load SUCCESS")
else:
print(" [!] Load failed...")
sample_labels = sloppy_sample_labels()
for epoch in xrange(config.epoch):
batch_idxs = len(data_X) // config.batch_size
for idx in xrange(0, batch_idxs):
batch_images = data_X[idx*config.batch_size:(idx+1)*config.batch_size]
prev_batch_images = prev_X[idx*config.batch_size:(idx+1)*config.batch_size]
batch_labels = data_y[idx*config.batch_size:(idx+1)*config.batch_size]
'''
Note that the mu and sigma are set to (-1,1) in the experiment of the paper :
"MidiNet: A Convolutional Generative Adversarial Network for Symbolic-domain Music Generation"
However, the result are similar by using (0,1)
'''
batch_z = np.random.normal(0, 1, [config.batch_size, self.z_dim]) \
.astype(np.float32)
# Update D network
_, summary_str = self.sess.run([d_optim, self.d_sum],
feed_dict={ self.images: batch_images, self.z: batch_z ,self.y:batch_labels, self.prev_bar:prev_batch_images })
self.writer.add_summary(summary_str, counter)
# Update G network
_, summary_str = self.sess.run([g_optim, self.g_sum],
feed_dict={ self.images: batch_images, self.z: batch_z ,self.y:batch_labels, self.prev_bar:prev_batch_images })
self.writer.add_summary(summary_str, counter)
# Run g_optim twice to make sure that d_loss does not go to zero (different from paper)
# We've tried to run more d_optim and g_optim, while getting a better result by running g_optim twice in this MidiNet version.
_, summary_str = self.sess.run([g_optim, self.g_sum],
feed_dict={ self.images: batch_images, self.z: batch_z ,self.y:batch_labels, self.prev_bar:prev_batch_images })
self.writer.add_summary(summary_str, counter)
errD_fake = self.d_loss_fake.eval({self.z: batch_z, self.y:batch_labels, self.prev_bar:prev_batch_images })
errD_real = self.d_loss_real.eval({self.images: batch_images, self.y:batch_labels })
errG = self.g_loss.eval({self.images: batch_images, self.z: batch_z, self.y:batch_labels, self.prev_bar:prev_batch_images })
counter += 1
print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \
% (epoch, idx, batch_idxs,
time.time() - start_time, errD_fake+errD_real, errG))
if np.mod(counter, 100) == 1:
samples, d_loss, g_loss = self.sess.run(
[self.sampler, self.d_loss, self.g_loss],
feed_dict={self.z: sample_z, self.images: sample_images, self.y:sample_labels, self.prev_bar:prev_batch_images }
)
#samples = (samples+1.)/2.
save_images(samples[:5,:], [1, 5],
'./{}/train_{:02d}_{:04d}.png'.format(config.sample_dir, epoch, idx))
print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss))
np.save('./{}/train_{:02d}_{:04d}'.format(config.gen_dir, epoch, idx), samples)
if np.mod(counter, len(data_X)/config.batch_size) == 0:
self.save(config.checkpoint_dir, counter)
print("Epoch: [%2d] time: %4.4f, d_loss: %.8f" \
% (epoch,
time.time() - start_time, (errD_fake+errD_real)/batch_idxs))
def discriminator(self, x, y=None, reuse=False):
df_dim = 64
dfc_dim = 1024
if reuse:
tf.get_variable_scope().reuse_variables()
if not self.y_dim:
h0 = lrelu(self.d_bn0(conv2d(x, 64, k_h=4, k_w=89, name='d_h0_conv')))
h1 = lrelu(self.d_bn1(conv2d(h0, 64, k_h=4, k_w=1, name='d_h1_conv')))
h2 = lrelu(self.d_bn2(conv2d(h1, 64, k_h=4, k_w=1, name='d_h2_conv')))
h3 = linear(tf.reshape(h2, [self.batch_size, -1]), 1, 'd_h3_lin')
return tf.nn.sigmoid(h3), h3
else:
yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
x = conv_cond_concat(x, yb)
h0 = lrelu(conv2d(x, self.c_dim + self.y_dim,k_h=2, k_w=128, name='d_h0_conv'))
fm = h0
h0 = conv_cond_concat(h0, yb)
h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim + self.y_dim,k_h=4, k_w=1, name='d_h1_conv')))
h1 = tf.reshape(h1, [self.batch_size, -1])
h1 = tf.concat(1, [h1, y])
h2 = lrelu(self.d_bn2(linear(h1, self.dfc_dim, 'd_h2_lin')))
h2 = tf.concat(1, [h2, y])
h3 = linear(h2, 1, 'd_h3_lin')
return tf.nn.sigmoid(h3), h3, fm
def generator(self, z, y=None, prev_x = None):
h0_prev = lrelu(self.g_prev_bn0(conv2d(prev_x, 16, k_h=1, k_w=128,d_h=1, d_w=2, name='g_h0_prev_conv')))
h1_prev = lrelu(self.g_prev_bn1(conv2d(h0_prev, 16, k_h=2, k_w=1, name='g_h1_prev_conv')))
h2_prev = lrelu(self.g_prev_bn2(conv2d(h1_prev, 16, k_h=2, k_w=1, name='g_h2_prev_conv')))
h3_prev = lrelu(self.g_prev_bn3(conv2d(h2_prev, 16, k_h=2, k_w=1, name='g_h3_prev_conv')))
yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
z = tf.concat(1, [z, y])
h0 = tf.nn.relu(self.g_bn0(linear(z, 1024, 'g_h0_lin')))
h0 = tf.concat(1, [h0, y])
h1 = tf.nn.relu(self.g_bn1(linear(h0, self.gf_dim*2*2*1, 'g_h1_lin')))
h1 = tf.reshape(h1, [self.batch_size, 2, 1, self.gf_dim * 2])
h1 = conv_cond_concat(h1, yb)
h1 = conv_prev_concat(h1, h3_prev)
h2 = tf.nn.relu(self.g_bn2(deconv2d(h1, [self.batch_size, 4, 1, self.gf_dim * 2],k_h=2, k_w=1,d_h=2, d_w=2 ,name='g_h2')))
h2 = conv_cond_concat(h2, yb)
h2 = conv_prev_concat(h2, h2_prev)
h3 = tf.nn.relu(self.g_bn3(deconv2d(h2, [self.batch_size, 8, 1, self.gf_dim * 2],k_h=2, k_w=1,d_h=2, d_w=2 ,name='g_h3')))
h3 = conv_cond_concat(h3, yb)
h3 = conv_prev_concat(h3, h1_prev)
h4 = tf.nn.relu(self.g_bn4(deconv2d(h3, [self.batch_size, 16, 1, self.gf_dim * 2],k_h=2, k_w=1,d_h=2, d_w=2 ,name='g_h4')))
h4 = conv_cond_concat(h4, yb)
h4 = conv_prev_concat(h4, h0_prev)
return tf.nn.sigmoid(deconv2d(h4, [self.batch_size, 16, 128, self.c_dim],k_h=1, k_w=128,d_h=1, d_w=2, name='g_h5'))
def sampler(self, z, y=None, prev_x=None):
tf.get_variable_scope().reuse_variables()
h0_prev = lrelu(self.g_prev_bn0(conv2d(prev_x, 16, k_h=1, k_w=128, d_h=1, d_w=2,name='g_h0_prev_conv')))
h1_prev = lrelu(self.g_prev_bn1(conv2d(h0_prev, 16, k_h=2, k_w=1, name='g_h1_prev_conv')))
h2_prev = lrelu(self.g_prev_bn2(conv2d(h1_prev, 16, k_h=2, k_w=1, name='g_h2_prev_conv')))
h3_prev = lrelu(self.g_prev_bn3(conv2d(h2_prev, 16, k_h=2, k_w=1, name='g_h3_prev_conv')))
yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
z = tf.concat(1, [z, y])
h0 = tf.nn.relu(self.g_bn0(linear(z, 1024, 'g_h0_lin')))
h0 = tf.concat(1, [h0, y])
h1 = tf.nn.relu(self.g_bn1(linear(h0, self.gf_dim*2*2*1, 'g_h1_lin')))
h1 = tf.reshape(h1, [self.batch_size, 2, 1, self.gf_dim * 2])
h1 = conv_cond_concat(h1, yb)
h1 = conv_prev_concat(h1, h3_prev)
h2 = tf.nn.relu(self.g_bn2(deconv2d(h1, [self.batch_size, 4, 1, self.gf_dim * 2],k_h=2, k_w=1,d_h=2, d_w=2 ,name='g_h2')))
h2 = conv_cond_concat(h2, yb)
h2 = conv_prev_concat(h2, h2_prev)
h3 = tf.nn.relu(self.g_bn3(deconv2d(h2, [self.batch_size, 8, 1, self.gf_dim * 2],k_h=2, k_w=1,d_h=2, d_w=2 ,name='g_h3')))
h3 = conv_cond_concat(h3, yb)
h3 = conv_prev_concat(h3, h1_prev)
h4 = tf.nn.relu(self.g_bn4(deconv2d(h3, [self.batch_size, 16, 1, self.gf_dim * 2],k_h=2, k_w=1,d_h=2, d_w=2 ,name='g_h4')))
h4 = conv_cond_concat(h4, yb)
h4 = conv_prev_concat(h4, h0_prev)
return tf.nn.sigmoid(deconv2d(h4, [self.batch_size, 16, 128, self.c_dim],k_h=1, k_w=128,d_h=1, d_w=2, name='g_h5'))
def save(self, checkpoint_dir, step):
model_name = "MidiNet.model"
model_dir = "%s_%s_%s" % (self.dataset_name, self.batch_size, self.output_w)
checkpoint_dir = os.path.join(checkpoint_dir, model_dir)
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
self.saver.save(self.sess,
os.path.join(checkpoint_dir, model_name),
global_step=step)
def load(self, checkpoint_dir):
print(" [*] Reading checkpoints...")
model_dir = "%s_%s_%s" % (self.dataset_name, self.batch_size, self.output_w)
checkpoint_dir = os.path.join(checkpoint_dir, model_dir)
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
return True
else:
return False
================================================
FILE: v1/ops.py
================================================
#These scripts are refer to "https://github.com/carpedm20/DCGAN-tensorflow"
import math
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
from utils import *
class batch_norm(object):
def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"):
with tf.variable_scope(name):
self.epsilon = epsilon
self.momentum = momentum
self.name = name
def __call__(self, x, train=True):
return tf.contrib.layers.batch_norm(x,
decay=self.momentum,
updates_collections=None,
epsilon=self.epsilon,
scale=True,
scope=self.name)
def binary_cross_entropy(preds, targets, name=None):
"""Computes binary cross entropy given `preds`.
For brevity, let `x = `, `z = targets`. The logistic loss is
loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
Args:
preds: A `Tensor` of type `float32` or `float64`.
targets: A `Tensor` of the same type and shape as `preds`.
"""
eps = 1e-12
with ops.op_scope([preds, targets], name, "bce_loss") as name:
preds = ops.convert_to_tensor(preds, name="preds")
targets = ops.convert_to_tensor(targets, name="targets")
return tf.reduce_mean(-(targets * tf.log(preds + eps) +
(1. - targets) * tf.log(1. - preds + eps)))
def conv_cond_concat(x, y):
"""Concatenate conditioning vector on feature map axis."""
x_shapes = x.get_shape()
y_shapes = y.get_shape()
return tf.concat(3, [x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])])
def conv_prev_concat(x, y):
"""Concatenate conditioning vector on feature map axis."""
x_shapes = x.get_shape()
y_shapes = y.get_shape()
if x_shapes[:2] == y_shapes[:2]:
return tf.concat(3, [x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])])
else:
print x_shapes[:2]
print y_shapes[:2]
def conv2d(input_, output_dim,
k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
name="conv2d"):
with tf.variable_scope(name):
w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
initializer=tf.truncated_normal_initializer(stddev=stddev))
conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='VALID')
biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
return conv
def deconv2d(input_, output_shape,
k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
name="deconv2d", with_w=False, pad = 'VALID'):
with tf.variable_scope(name):
# filter : [height, width, output_channels, in_channels]
w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
initializer=tf.random_normal_initializer(stddev=stddev))
try:
deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1],padding = pad)
# Support for verisons of TensorFlow before 0.7.0
except AttributeError:
deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1],padding = pad)
biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
if with_w:
return deconv, w, biases
else:
return deconv
def lrelu(x, leak=0.2, name="lrelu"):
return tf.maximum(x, leak*x)
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
shape = input_.get_shape().as_list()
with tf.variable_scope(scope or "Linear"):
matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
tf.random_normal_initializer(stddev=stddev))
bias = tf.get_variable("bias", [output_size],
initializer=tf.constant_initializer(bias_start))
if with_w:
return tf.matmul(input_, matrix) + bias, matrix, bias
else:
return tf.matmul(input_, matrix) + bias
================================================
FILE: v1/samples/README.md
================================================
Training data:
epoch 1:
epoch 5:
epoch 10:
epoch 15:
epoch 20: