MKL

Ubuntu 16.04安装MKL:

下载 https://software.intel.com/en-us/mkl 解压
进入解压文件,执行:

1
./install.sh

在 /etc/ld.so.conf.d 下创建名为 intel-mkl.conf 的文件,内容为

1
2
/opt/intel/mkl/lib/intel64 
/opt/intel/lib/intel64

执行

1
2
3
ldconfig -v 

/opt/intel/mkl/bin/mklvars.sh intel64 ilp64

编译:
以编译官方文档上的 dgemm_example.c 为例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#define min(x,y) (((x) < (y)) ? (x) : (y))  
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"

int main()
{
double *A, *B, *C;
int m, n, p, i, j;
double alpha, beta;

printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n"
" Intel(R) MKL function dgemm, where A, B, and C are matrices and \n"
" alpha and beta are double precision scalars\n\n");

m = 2000, p = 200, n = 1000;
printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
" A(%ix%i) and matrix B(%ix%i)\n\n", m, p, p, n);
alpha = 1.0; beta = 0.0;
printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
" performance \n\n");
A = (double *)mkl_malloc( m*p*sizeof( double ), 64 );
B = (double *)mkl_malloc( p*n*sizeof( double ), 64 );
C = (double *)mkl_malloc( m*n*sizeof( double ), 64 );
if (A == NULL || B == NULL || C == NULL) {
printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
mkl_free(A);
mkl_free(B);
mkl_free(C);
return 1;
}

printf (" Intializing matrix data \n\n");
for (i = 0; i < (m*p); i++) {
A[i] = (double)(i+1);
}

for (i = 0; i < (p*n); i++) {
B[i] = (double)(-i-1);
}

for (i = 0; i < (m*n); i++) {
C[i] = 0.0;
}

printf (" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, p, alpha, A, p, B, n, beta, C, n);
printf ("\n Computations completed.\n\n");

printf (" Top left corner of matrix A: \n");
for (i=0; i<min(m,6); i++) {
for (j=0; j<min(p,6); j++) {
printf ("%12.0f", A[j+i*p]);
}
printf ("\n");
}

printf ("\n Top left corner of matrix B: \n");
for (i=0; i<min(p,6); i++) {
for (j=0; j<min(n,6); j++) {
printf ("%12.0f", B[j+i*n]);
}
printf ("\n");
}

printf ("\n Top left corner of matrix C: \n");
for (i=0; i<min(m,6); i++) {
for (j=0; j<min(n,6); j++) {
printf ("%12.5G", C[j+i*n]);
}
printf ("\n");
}

printf ("\n Deallocating memory \n\n");
mkl_free(A);
mkl_free(B);
mkl_free(C);

printf (" Example completed. \n\n");
return 0;
}

执行

1
2
3
. /opt/intel/bin/compilervars.sh intel64

gcc dgemm_example.c -lmkl_rt

-------------本文结束感谢您的阅读-------------