blas.h 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. #ifndef BLAS_H
  2. #define BLAS_H
  3. #include <stdlib.h>
  4. #include "darknet.h"
  5. #ifdef GPU
  6. #include "dark_cuda.h"
  7. #include "tree.h"
  8. #endif
  9. #ifdef __cplusplus
  10. extern "C" {
  11. #endif
  12. void flatten(float *x, int size, int layers, int batch, int forward);
  13. void pm(int M, int N, float *A);
  14. float *random_matrix(int rows, int cols);
  15. void time_random_matrix(int TA, int TB, int m, int k, int n);
  16. void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);
  17. void test_blas();
  18. void const_cpu(int N, float ALPHA, float *X, int INCX);
  19. void constrain_ongpu(int N, float ALPHA, float * X, int INCX);
  20. void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
  21. void mul_cpu(int N, float *X, int INCX, float *Y, int INCY);
  22. void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
  23. void copy_cpu(int N, float *X, int INCX, float *Y, int INCY);
  24. void scal_cpu(int N, float ALPHA, float *X, int INCX);
  25. void scal_add_cpu(int N, float ALPHA, float BETA, float *X, int INCX);
  26. void fill_cpu(int N, float ALPHA, float * X, int INCX);
  27. float dot_cpu(int N, float *X, int INCX, float *Y, int INCY);
  28. void test_gpu_blas();
  29. void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);
  30. void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, float **layers_output, float *out, float *in, float *weights, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion);
  31. void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers,
  32. float **layers_delta, float *delta_out, float *delta_in, float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalizion);
  33. void mean_cpu(float *x, int batch, int filters, int spatial, float *mean);
  34. void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
  35. void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);
  36. void add_bias(float *output, float *biases, int batch, int n, int size);
  37. void scale_bias(float *output, float *scales, int batch, int n, int size);
  38. void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
  39. void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
  40. void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);
  41. void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);
  42. void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
  43. void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
  44. void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
  45. void softmax(float *input, int n, float temp, float *output, int stride);
  46. void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out);
  47. void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
  48. void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
  49. void constrain_cpu(int size, float ALPHA, float *X);
  50. void fix_nan_and_inf_cpu(float *input, size_t size);
  51. #ifdef GPU
  52. void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
  53. void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
  54. void simple_copy_ongpu(int size, float *src, float *dst);
  55. void memcpy_ongpu(void *dst, void *src, int size_bytes);
  56. void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
  57. void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
  58. void scal_ongpu(int N, float ALPHA, float * X, int INCX);
  59. void scal_add_ongpu(int N, float ALPHA, float BETA, float * X, int INCX);
  60. void supp_ongpu(int N, float ALPHA, float * X, int INCX);
  61. void mask_gpu_new_api(int N, float * X, float mask_num, float * mask, float val);
  62. void mask_ongpu(int N, float * X, float mask_num, float * mask);
  63. void const_ongpu(int N, float ALPHA, float *X, int INCX);
  64. void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
  65. void mul_ongpu(int N, float *X, int INCX, float *Y, int INCY);
  66. void fill_ongpu(int N, float ALPHA, float * X, int INCX);
  67. void mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
  68. void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
  69. void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);
  70. void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);
  71. void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
  72. void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);
  73. void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
  74. void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
  75. void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);
  76. void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion);
  77. void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_delta_gpu, float *delta_out, float *delta_in,
  78. float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalizion);
  79. void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);
  80. void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
  81. void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
  82. void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
  83. void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
  84. void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
  85. void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error);
  86. void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
  87. void l2_gpu(int n, float *pred, float *truth, float *delta, float *error);
  88. void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc);
  89. void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c);
  90. void mult_add_into_gpu(int num, float *a, float *b, float *c);
  91. void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);
  92. void softmax_gpu_new_api(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
  93. void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output);
  94. void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t);
  95. void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t);
  96. void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out);
  97. void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out);
  98. void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier);
  99. void fix_nan_and_inf(float *input, size_t size);
  100. int is_nan_or_inf(float *input, size_t size);
  101. void add_3_arrays_activate(float *a1, float *a2, float *a3, size_t size, ACTIVATION a, float *dst);
  102. void sum_of_mults(float *a1, float *a2, float *b1, float *b2, size_t size, float *dst);
  103. void activate_and_mult(float *a1, float *a2, size_t size, ACTIVATION a, float *dst);
  104. void scale_channels_gpu(float *in_w_h_c, int size, int channel_size, int batch_size, int scale_wh, float *scales_c, float *out);
  105. void backward_scale_channels_gpu(float *in_w_h_c_delta, int size, int channel_size, int batch_size, int scale_wh,
  106. float *in_scales_c, float *out_from_delta,
  107. float *in_from_output, float *out_state_delta);
  108. void backward_sam_gpu(float *in_w_h_c_delta, int size, int channel_size,
  109. float *in_scales_c, float *out_from_delta,
  110. float *in_from_output, float *out_state_delta);
  111. void sam_gpu(float *in_w_h_c, int size, int channel_size, float *scales_c, float *out);
  112. void smooth_rotate_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse);
  113. void stretch_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, float scale, int reverse);
  114. void sway_and_flip_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse);
  115. void stretch_sway_flip_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse);
  116. void rotate_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int reverse);
  117. void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups);
  118. void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups);
  119. #endif
  120. #ifdef __cplusplus
  121. }
  122. #endif
  123. #endif