/**
 * @file  dgemm_client_thread.cc
 *
 * @brief   BLAS/dgemm server : a DIET client for dgemm using threads
 *
 * @author   Benjamin DEPARDON (Benjamin.Depardon@ens-lyon.fr
 *           Philippe COMBES (Philippe.Combes@ens-lyon.fr)
 *
 * @section Licence
 *
 * Copyright Inria, ENS Lyon and UCBL (2000-2017) 
 * Copyright SysFera (2010-2015)
 *
 * - Eddy.Caron@ens-lyon.fr (Project Manager)
 *
 * This software is a computer program whose purpose is to provide an
 * easy and transparent access to distributed and heterogeneous
 * platforms.
 *
 *
 * This software is governed by the CeCILL license under French law and
 * abiding by the rules of distribution of free software.  You can  use,
 * modify and/ or redistribute the software under the terms of the CeCILL
 * license as circulated by CEA, CNRS and INRIA at the following URL
 * "http://www.cecill.info".
 *
 * As a counterpart to the access to the source code and  rights to copy,
 * modify and redistribute granted by the license, users are provided
 * only with a limited warranty  and the software's author,  the holder
 * of the economic rights,  and the successive licensors  have only
 * limited liability.
 *
 * In this respect, the user's attention is drawn to the risks
 * associated with loading,  using,  modifying and/or developing or
 * reproducing the software by the user in light of its specific status
 * of free software, that may mean  that it is complicated to
 * manipulate, and  that  also therefore means  that it is reserved for
 * developers and experienced professionals having in-depth computer
 * knowledge. Users are therefore encouraged to load and test the
 * software's suitability as regards their requirements in conditions
 * enabling the security of their systems and/or data to be ensured and,
 * more generally, to use and operate it in the same conditions as
 * regards security.
 *
 * The fact that you are presently reading this means that you have had
 * knowledge of the CeCILL license and that you accept its terms.
 *
 */


#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/stat.h>
#include <time.h>

#include <pthread.h>


#include "DIET_client.h"


#define print_matrix(mat, m, n, rm)             \
  {                                             \
    size_t i, j;                                \
    printf("%s (%s-major) = \n", # mat,          \
           (rm) ? "row" : "column");            \
    for (i = 0; i < (m); i++) {                 \
      for (j = 0; j < (n); j++) {               \
        if (rm) {                                 \
          printf("%3f ", (mat)[j + i * (n)]); }     \
        else {                                    \
          printf("%3f ", (mat)[i + j * (m)]); }     \
      }                                         \
      printf("\n");                             \
    }                                           \
    printf("\n");                               \
  }

/* argv[1]: client config file path
 * argv[2]: dgemm, MatSUM, or MatPROD */


static size_t m, n, k;
static double alpha, beta;
static unsigned int nb = 0;


void *
call(void *par) {
  diet_profile_t *profile;
  char *path = "dgemm";
  double *A = NULL;
  double *B = NULL;
  double *C = NULL;
  diet_matrix_order_t oA, oB, oC;
  size_t i, j;
  unsigned int id = nb;
  ++nb;


  oA = DIET_ROW_MAJOR;
  oB = DIET_COL_MAJOR;
  oC = DIET_ROW_MAJOR;

  /* Fill A, B and C randomly ... */
  A = (double *) calloc(m * k, sizeof(double));
  B = (double *) calloc(k * n, sizeof(double));
  C = (double *) calloc(m * n, sizeof(double));
  for (i = j = 0; i < m * k; i++) A[i] = 1.0 + j++;
  for (i = 0; i < k * n; i++) B[i] = 1.0 + j++;
  for (i = 0; i < m * n; i++) C[i] = 1.0 + j++;

  profile = diet_profile_alloc(path, 3, 4, 4);

  diet_scalar_set(diet_parameter(profile, 0), &alpha,
                  DIET_VOLATILE, DIET_DOUBLE);
  diet_matrix_set(diet_parameter(profile, 1), A,
                  DIET_VOLATILE, DIET_DOUBLE, m, k, oA);
  diet_matrix_set(diet_parameter(profile, 2), B,
                  DIET_VOLATILE, DIET_DOUBLE, k, n, oB);
  diet_scalar_set(diet_parameter(profile, 3), &beta,
                  DIET_VOLATILE, DIET_DOUBLE);
  diet_matrix_set(diet_parameter(profile, 4), C,
                  DIET_VOLATILE, DIET_DOUBLE, m, n, oC);

  for (;;) {
    printf("Calling DGEMM (%u)\n", id);

    /*    print_matrix(A, m, k, (oA == DIET_ROW_MAJOR));
          print_matrix(B, k, n, (oB == DIET_ROW_MAJOR));
          print_matrix(C, m, n, (oC == DIET_ROW_MAJOR));
     */
    if (!diet_call(profile)) {
      // print_matrix(C, m, n, (oC == DIET_ROW_MAJOR));
    }
  }

  diet_profile_free(profile);
  free(A);
  free(B);
  free(C);
} // call


int
main(int argc, char *argv[]) {
  unsigned int nbThreads, i;

  srand(time(NULL));

  if (argc != 8) {
    fprintf(stderr, "Usage: %s <file.cfg> m n k alpha beta nbThreads\n",
            argv[0]);
    return 1;
  }

  /* Parsing and preparation of m, n, j, A, B, C, alpha and beta */
  m = (size_t) atoi(argv[2]);
  n = (size_t) atoi(argv[3]);
  k = (size_t) atoi(argv[4]);
  alpha = strtod(argv[5], NULL);
  beta = strtod(argv[6], NULL);

  nbThreads = atoi(argv[7]);
  printf("nb threads %u\n", nbThreads);

  if (diet_initialize(argv[1], argc, argv)) {
    fprintf(stderr, "DIET initialization failed !\n");
    return 1;
  }

  pthread_t *thread = (pthread_t *) malloc(nbThreads * sizeof(pthread_t));
  for (i = 0; i < nbThreads; ++i) {
    pthread_create(&thread[i], NULL, call, NULL);
    // omni_thread::create(call, (void *) &l1, omni_thread::PRIORITY_NORMAL);
  }

  for (i = 0; i < nbThreads; ++i)
    pthread_join(thread[i], NULL);


  diet_finalize();

  return 0;
} // main
