You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tf_dcn_criteo.py 2.8 kB

4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import tensorflow as tf
  2. def cross_layer(x0, x1, device):
  3. # x0: input embedding feature (batch_size, 26 * embedding_size + 13)
  4. # x1: the output of last layer (batch_size, 26 * embedding_size + 13)
  5. embed_dim = x1.shape[-1]
  6. with tf.device(device):
  7. w = tf.compat.v1.get_variable(name='w', shape=(embed_dim,))
  8. b = tf.compat.v1.get_variable(name='b', shape=(embed_dim,))
  9. x_1w = tf.tensordot(tf.reshape(x1, [-1, 1, embed_dim]), w, axes=1)
  10. cross = x0 * x_1w
  11. return cross + x1 + b
  12. def build_cross_layer(x0, num_layers=3, device=tf.device('/gpu:0')):
  13. x1 = x0
  14. for i in range(num_layers):
  15. with tf.compat.v1.variable_scope('layer%d' % i):
  16. x1 = cross_layer(x0, x1, device)
  17. return x1
  18. def dcn_criteo(dense_input, sparse_input, y_, partitioner=None, part_all=True, param_on_gpu=True):
  19. feature_dimension = 33762577
  20. embedding_size = 128
  21. learning_rate = 0.003 / 8 # here to comply with HETU
  22. all_partitioner, embed_partitioner = (
  23. partitioner, None) if part_all else (None, partitioner)
  24. with tf.compat.v1.variable_scope('dcn', dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.01), partitioner=all_partitioner):
  25. with tf.device('/cpu:0'):
  26. Embedding = tf.compat.v1.get_variable(name="Embedding", shape=(
  27. feature_dimension, embedding_size), partitioner=embed_partitioner)
  28. sparse_input_embedding = tf.nn.embedding_lookup(
  29. Embedding, sparse_input)
  30. device = '/gpu:0' if param_on_gpu else '/cpu:0'
  31. with tf.device(device):
  32. W1 = tf.compat.v1.get_variable(
  33. name='W1', shape=[26*embedding_size + 13, 256])
  34. W2 = tf.compat.v1.get_variable(name='W2', shape=[256, 256])
  35. W3 = tf.compat.v1.get_variable(name='W3', shape=[256, 256])
  36. W4 = tf.compat.v1.get_variable(
  37. name='W4', shape=[256 + 26 * embedding_size + 13, 1])
  38. with tf.device('/gpu:0'):
  39. flatten = tf.reshape(sparse_input_embedding,
  40. (-1, 26*embedding_size))
  41. x = tf.concat((flatten, dense_input), 1)
  42. # CrossNet
  43. cross_output = build_cross_layer(x, num_layers=3, device=device)
  44. # DNN
  45. flatten = x
  46. fc1 = tf.matmul(flatten, W1)
  47. relu1 = tf.nn.relu(fc1)
  48. fc2 = tf.matmul(relu1, W2)
  49. relu2 = tf.nn.relu(fc2)
  50. y3 = tf.matmul(relu2, W3)
  51. y4 = tf.concat((cross_output, y3), 1)
  52. y = tf.matmul(y4, W4)
  53. loss = tf.reduce_mean(
  54. tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=y_))
  55. optimizer = tf.compat.v1.train.GradientDescentOptimizer(
  56. learning_rate)
  57. return loss, y, optimizer

分布式深度学习系统

Contributors (1)