...

OpenMP

by user

on
Category: Documents
16

views

Report

Comments

Description

Transcript

OpenMP
OpenMP
ę*žƎŠƊƃƐŠ!ÿď
Â¥SVęáýV¹ÁŪƐŬƒ
{ięÖ
1
ŋħĮ
•! я
•! *žƎŠƊƃƐŠî!ÿ
–! OpenMP
•! OpenžƎŠƊƃƐŠĿ™Ü
•! Advanced Topics
–! SMPşƊŨŬĚHybrid Programming
–! OpenMP 3.0ę(task)
•! ňĻŊ
2
áÃĿčö5Ļŀ
•! ţƐŻƈƒŬĿčö5
–! űŸřŨ
–! áÝŗƒŞŰşŮƇ
ŹřžƊřƐĚ
ŨƒŹŨŝƊ
•! áÝŗƒŞŰşŮƇĿčö5Ŀ•íŀĚĠ
ƂƌŮƑţŗ
œĠœĽ'¯ŕ?ľŌőĪĻ
–! CPUĿ
–! ŮůžĿ
–! Ůůžā
–! ţƐŻƈƒŬā
"“ƅƆƋ
*ţƐŻƈƒŬ
)ƒƅƆƋ*ţƐŻƈƒŬ
ŠƋůų
3
žƎŪůŤ¹ÁĀ¶Ŀ4@
•! şƎůşĿčö5ĚÚ÷žƎŪŨĿnÊ5
–! Ġňĺŀ3GHz, „dĿġĶľ10GHzĤƓƟ
•! řƐŰƌĿw³ĿðęĘęƂƌŮţŗ
–! žƎŪŨŀ65nmĘ45nm , \–·ľŀ32nm(20?)
•! ŲƊƐŧŨŬ„ŀNĢőƓ
•! ŗƒŞŰşŮƇĿ‚Ô
–! ŨƒŹƒŹřžƊřƐĚŨƒŹƒŨŝƊĚVLIW!
–! ŞƇůŦƈĿR 5ĚƂřşƎžƎŪůŤĺŋL3ŞƇůŦƈ
–! ƂƌŮŨƍůų5ĚIntel Hyperthreading
•! ۄĿžƎŠƊƄŕ?ľ'¯
–! ƂƌŮţŗƛƘĸĿŮůžľÛ„ĿCPU
4
řƐŰƌ® Pentium® žƎŪůŤď
ŚşŨŲƋƒƄƑŚűŘŦƉƐĿŭřď
ĽIJĚ*5įőĿĤƟ
4ĸĿţŗĥğŒŁĚƙƓ
5
*5ĿŜƒŸſůų
õĥ
pÜľĽőĻ
ìÕľŁŏĸĦ
ĥğőĻ
6
*žƎŠƊƃƐŠĿpÜr
•! *'¯ĥpÜĽţƐŻƈƒŬĿŽ8
–! şƊŨŬ
•! éĺŋĚşƊŨŬĥŒő
–! ƂƌŮƑţŗ
•! ƘĸĿŮůžľÛ„ĿCPUĥƓ
–! ŤƒŸ
•! ĠňĺŀŇĻŖļĥƂƌŮžƎŪůŤ
•! ĪŒŏŕĠĪĽįĴŊľŀļġįŒŁĠĠĿĤƟ
7
*žƎŠƊƃƐŠƆűƌ
8
*žƎŠƊƃƐŠƑƆűƌ
•! ƅůŪƒŧõď(Message Passing)
–! ƅůŪƒŧĿŌŐ:ŐĺŌŐ:ŐŕĭĹĚžƎŠƊƄįő
–!
–!
–!
–!
)ƒƅƆƋŦŨŰƄƔ"“ƅƆƋĺŋĚ;ƕ
žƎŠƊƃƐŠĥĈĚćĭĠ
žƎŠƊƂĥűƒŬĿ¾4ŕ.m
žƎŪůŤ„ľ[ĭĹŨšƒƊŽƌ
•! "“ƅƆƋď(shared memory)
–! "õľŗşŪŨĺĦőƅƆƋŕßĭĹĚűƒŬĿŌŐ:Ő
–!
–!
–!
–!
"“ƅƆƋŦŨŰƄƔDSMŦŨŰƄon)ƒƅƆƋƕ
žƎŠƊƃƐŠĭŌįĠƔôžžƎŠƊƄĤŏƕ
ŦŨŰƄĥűƒŬĿ¾4ŕ×ķĹħŒő
žƎŪůŤ„ľ[ĭĹŨšƒƊŽƌĺŀĽĠĪĻĥRĠě
9
*'¯ĿÆ6Ľ
for(i=0;i<1000; i++)
S += A[i]!
1
2
3
4
1000
+
1
2
250
+
251
500
+
501
750
+
+
S
751
S
1000
+
10
ƂƌŮŨƍůųžƎŠƊƃƐŠ
•! Ũƍůų
–! øĿžƎŠƊƄĿX×ŕzë5ĭĴŋĿ
–! t·ĽžƎŪůŤĻĭĹŋĶĠĹŋŎĠ
–! žƎŪŨĻĿúĠ
ĴħĬŖĿžƎŠƊƄ
ĥ?ľX×ĬŒĹĠő
–! POSIXŨƍůųďpthread
Ũƍůų
11
POSIXŨƍůųľŎőžƎŠƊƃƐŠ
•! ŨƍůųĿ±v
Pthread, Solaris thread
•! ƌƒžĿ{iû)Ŀ)0
•! ïĭ>ŔıĿ?”
int s; /* global */
for(t=1;t<n_thd;t++){
int n_thd; /* number of threads */
r=pthread_create(thd_main,t)
int thd_main(int id)
}
{ int c,b,e,i,ss;
thd_main(0);
c=1000/n_thd;
for(t=1; t<n_thd;t++)
b=c*id;
pthread_join();
e=s+c;
ss=0;
for(i=b; i<e; i++) ss += a[i];
ŨƍůųƝ
pthread_lock();
s += ss;
žƎŠƊƄX×Ŀ§Œ
pthread_unlock();
return s;
}
12
OpenMPľŎőžƎŠƊƃƐŠ
ĪŒĵĩĺĚOK!
#pragma omp parallel for reduction(+:s)
for(i=0; i<1000;i++) s+= a[i];
13
OpenMPĻŀ
•! "“ƅƆƋƂƌŮžƎŪůŤĿ*žƎŠƊƃƐŠĿĴŊĿ
žƎŠƊƃƐŠƆűƌ
–! ƀƒŨàè(Fortran/C/C++)ŕdirectiveƔ}»†ƕĺ*žƎŠƊƃƐ
ŠĺĦőŎġľ|g
•! ÇFţƐŹřƊĂĿISVŕoľ›ŕ£W
–! Oct. 1997 Fortran ver.1.0 API
–! Oct. 1998 C/C++ ver.1.0 API
–! ®GĚOpenMP 3.0
•! URL
–! http://www.openmp.org/
14
OpenMPĿя
•! "“ƅƆƋƂƌŮžƎŪůŤŦŨŰƄĿŽ8
–! ijĭĹĚĠňŌęƂƌŮţŗƑžƎŪůŤĥ§ľƓ
•! "“ƅƆƋƂƌŮžƎŪůŤŦŨŰƄĿ*5}»†Ŀ"õ5ĿpÜr
–! =¼ĺ*5}»†ĥ´ĽŐĚ¾˜rĥĽĠě
•! OpenMPĿ}»†ŀ*X×ƆűƌŅĿƠƤƢŕ€
–! l–Ŀ}»†ŀ*5ţƐŹřƊĿĴŊĿźƐŲŕĢőŋĿ
•! ½VyØáÃĥĽŬƒŢůŲƔĪŒňĺƕ
–! *rĥčĠ
–! ţƒųĿ5%ĥ95%ĿX׍āŕ7Ŋő(?)ęę5%ŕÆ6ľ*5įő
•! "“ƅƆƋƂƌŮžƎŪůŤŦŨŰƄĥŬƒŢůŲ
–! small-scale(Ƨ16žƎŪůŤƕĤŏmedium-scale (Ƨ64žƎŪůŤƕŕ[ë
–! l–ŀƂƌŮŨƍůųžƎŠƊƃƐŠ
15
•! pthreadŀOS-oriented, general-purpose
OpenMPĿAPI
•! ‡ĭĠàèĺŀĽĠƓ
–! ţƐŹřƊ}»†Ɣdirectives/pragmaƕĚƊřŽƊƋĚ°MP„ľŎŐ
ƀƒŨàèŕ|g
–! ƀƒŨàèƛFortran77, f90, C, C++
•! Fortranƛď!$OMPĤŏTňő}»×
•! C: #pragma omp Ŀpragma}»×
•! Ó4*5ĺŀĽĠƓ
–! *X×ƨ?”ŕžƎŠƊƂĥ‹»
•! }»†ŕ©ÞįőĪĻľŎŐĚôžĺX×;
–! incrementalľ*5
–! žƎŠƊƄĀ¶ĚűŸůşĿĈĤŏX²·
–! ôžªĻ*ªŕ?ĮūƒŨĺįĥĺĦő
16
OpenMPĿX×Ɔűƌ
•! ôžX×ĤŏTňő
•! Fork-joinƆűƌ
•! parallel region
–! ÄCŃ(ĭŋüÛX×
… A ...!
#pragma omp parallel
{
foo(); /* ..B... */!
}!
… C ….
#pragma omp parallel
{
…D…
}!
… E ...!
fork
A
Call foo() Call foo() Call foo() Call foo()
B
join
C
D
E
17
Parallel Region
•! ۄĿŨƍůų(team)ľŎķĹĚ*X×ĬŒőû)
–! Parallelš†ĺ}W
–! ?ĮParallel regionŕX×įőŨƍůųŕteamĻCń
–! region$ŕteam$ĿŨƍůųĺüÛX×
•! ÄCŃ(ĭŋüÛX×
Fortran:
!$OMP PARALLEL!
…
… parallel region
...
!$OMP END PARALLEL
C:
#pragma omp parallel
{
...
... Parallel region...
...
}
18
Æ6ĽűƆ
•!
•!
•!
•!
žƎŪůŤĿºçď/proc/cpuinfo
gcc –fopenmp, gccŀĚ4.2ĤŏŤƁƒŲ, gfortran
Æ6ĽžƎŠƊƄ
žƎŪůŤ„ŀ°MP„OMP_NUM_THREADSĺ.m
#include <omp.h>
#include <stdio.h>
main()
{
printf("omp-test ... n_thread=%d\n",omp_get_max_threads());
#pragma omp parallel
{
printf("thread (%d/%d)...\n",
omp_get_thread_num(),omp_get_num_threads());
}
printf("end...\n");
19
}
Work sharingš†
•! Team$ĿŨƍůųĺ){ĭĹX×įőû)ŕ}W
–! parallel region$ĺ²Ġő
–! for š†
•! řŬƍƒŦƉƐŕ){ĭĹX×
•! űƒŬ*
–! sectionsš†
•! =ŪşŦƉƐŕ){ĭĹX×
•! ŬŨş*
–! singleš†
•! ĸĿŨƍůųĿʼnĥX×
–! parallel š†ĻÌʼn>ŔıĴâ¤
•! parallel for š†
•! parallel sectionsš†
thread1
thread2
thread3
Duplicated execution
directives
work-sharing, sync
20
Forš†
•! ForƌƒžƔDOƌƒžƕĿřŬƍƒŦƉƐŕ*X×
•! }»†Ŀ¸kĿforƌƒžŀcanonical shapeĺĽħĹŀĽŏĽĠ
#pragma omp for [clause…]
for(var=lb; var logical-op ub; incr-expr)
body
–! varŀ…„IĿƌƒžP„Ɣh.·ľprivateƕ
–! incr-expr
•! ++var,var++,--var,var--,var+=incr,var-=incr
–! logical-op
•! ƜĚƜƝĚƞĚƞƝ
–! ƌƒžĿQĿČŃ(ĭŀĽĭĚbreakŋĽĭ
–! clauseĺ*ƌƒžĿŨšŧƈƒƋƐŠĚűƒŬ]rŕ}W
21
ƛ×*ƀşŲƌÀ
22
ĵĠĴĠĚrÒŀĪġĽő
23
ƛµ
µ×*ƀşŲƌÀƌƒŮƐ
Matvec(double a[],int row_start,int col_idx[],
double x[],double y[],int n)
{
int i,j,start,end; double t;
#pragma omp parallel for private(j,t,start,end)
for(i=0; i<n;i++){
start=row_start[i];
end=row_start[i+1];
t = 0.0;
for(j=start;j<end;j++)
A
t += a[j]*x[col_idx[j]];
y[i]=t;
a[col_idx[j]]
}
}
a
y
X
24
*ƌƒžĿŨšŧƈƒƋƐŠ
•! žƎŪůŤ„ƙĿL>
ôž
n
Iteration space
schedule(static,n)
Schedule(static)
Schedule(dynamic,n)
Schedule(guided,n)
ļĿŎġĽĻĦľĠ)ĩŕįőĿĤŕÐĢĹʼnňĭōġě
25
Data scope]r}W
•! parallelš†Ěwork sharingš†ĺ}»Åĺ}W
•! shared(var_list)
–! š†$ĺ}WĬŒĴP„ĥŨƍůųāĺ"“ĬŒő
•! private(var_list)
–! š†$ĺ}WĬŒĴP„ĥprivate
•! firstprivate(var_list)
–! privateĻ?›ĺğőĥ̸/Ŀĺ+”5ĬŒő
•! lastprivate(var_list)
–! privateĻ?›ĺğőĥ̚†ĥˍľôžX×ĬŒĴL>Ŀ’kĿ
ŕ9Œįő
•! reduction(op:var_list)
–! reductionŗşŪŨŕįőĪĻŕ}WĚŨŝƊP„Ŀʼn
–! X×ŀprivate̚†Ëkľ9Œ
26
Data Race
OpenMP
ŀ"“ƅƆƋ
Ɔűƌ
Data RaceƔűƒŬƍƒŨƕď=
ۄĿŨƍůųĥ?Į"“P
„ŕ?ľ‘ĦĢő
27
*5ĺĦĽĠƌƒž
28
Barrier }Ƞ
•! ŸƋŗ?”ŕ×ġ
–! ŮƒƄ$ĿŨƍůųĥ?”¨ľùįőňĺĚjĸ
–! ijŒňĺĿƅƆƋ‘Ħòʼnŋflushįő
–! *ƋƒŧƉƐĿËŔŐĚwork sharingš†ĺnowait}
»Åĥ}WĬŒĽĠăŐ̐ϷľŸƋŗ?”ĥ×ŔŒőě
#pragma omp barrier
29
ŸƋŗŀĪġĠġS
õaĿforš†ŀĚimplicitľŸƋŗĥĻŏŒĹĠőĴŊľĚ
¬,ľŸƋŗŕĠŒőpÜŀĽĠ
30
nowaitĿĠˆ
31
ijĿĚüÜĽ}»†
•! singleš†ƛęƘĸĿŨƍůųĵĩĺX×įőû)ŕ
}W
•! masterš†ƛƂŨŬƑŨƍůųĵĩĺX×įőû)ŕ
}W
•! sectionš†ƛ,ĜĿžƎŠƊƄX×ŕŨƍůųŕŨ
ƍůųľ0ŐiĹő
•! criticalš†ƛ~ĊJƔ?ľX×ĺĦĽĠû)ƕ
ŕ}W
•! flushš†
•! threadprivateš†
32
OpenMPĻMPIĿžƎŠƊƄƛcpi
•! À)ĭĹĚ%B­ŕ¢ŊőžƎŠƊƄ
•! MPICHĿŰŨŲžƎŠƊƄ
•! OpenMPª
–! ƌƒžŕ*5įőĵĩ, 1×Ŀʼn
•! MPIª(cpi-mpi.c)
–! !1ĬŒĴP„nĿŕBcast
–! ’kľreduction
–! áÃŀĚžƎŪůŤīĻľČŃČŃľŌķĹĠő
33
#include <stdio.h>
#include <math.h>
double f( double );
double f( double a )
{
return (4.0 / (1.0 + a*a));
}
OpenMPª
int main( int argc, char *argv[])
{
int n, i;
double PI25DT = 3.141592653589793238462643;
double pi, h, sum, x;
scanf(“%d",&n);
h
= 1.0 / (double) n;
sum = 0.0;
#pragma omp parallel for private(x) reduction(+:sum)
for (i = 1; i <= n; i++){
ęęęęęęx = h * ((double)i - 0.5);
ęęęęęęsum += f(x);
}
pi = h * sum;
printf("pi is approximately %.16f, Error is %.16f\n",
pi, fabs(pi - PI25DT));
return 0;
}
34
OpenMPĿžƎŠƊƄƛlaplace
•! Laplaceˆ¿eĿĄ·ß¤
–! ^<Ŀ4¨ĿcHĺĚupdateĭĹĠħžƎŠƊƄ
–! OldĻnewŕ²uĭŸ/ĿŕţŻƒ
–! #I·ĽĊJ)0
–! ’kľŸ_ŕĻő
•! OpenMPªďlap.c
–! 3ĸĿƌƒžŕQĺ*5
•! OpenMPŀƘž Ŀʼn
–! Parallel}»†Ļfor}»†ŕĆĭĹĸĤķĹʼnĴ
•! MPIª
–! ͚ĴĠŅŖ
35
/*
* Laplace equation with explict method
*/
#include <stdio.h>
#include <math.h>
/* square region */
#define XSIZE 1000
#define YSIZE 1000
#define PI 3.1415927
#define NITER 100
double u[XSIZE+2][YSIZE+2],uu[XSIZE+2][YSIZE+2];
double time1,time2;
double second();
void initialize();
void lap_solve();
main()
{
initialize();
time1 = second();
lap_solve();
time2 = second();
}
printf("time=%g\n",time2-time1);
exit(0);
36
void lap_solve()
{
int x,y,k;
double sum;
#pragma omp parallel private(k,x,y)
{
for(k = 0; k < NITER; k++){
/* old <- new */
#pragma omp for
for(x = 1; x <= XSIZE; x++)
for(y = 1; y <= YSIZE; y++)
uu[x][y] = u[x][y];
/* update */
#pragma omp for
for(x = 1; x <= XSIZE; x++)
for(y = 1; y <= YSIZE; y++)
u[x][y] = (uu[x-1][y] + uu[x+1][y] + uu[x][y-1] + uu[x][y+1])/4.0;
}
}
/* check sum */
sum = 0.0;
#pragma omp parallel for private(y) reduction(+:sum)
for(x = 1; x <= XSIZE; x++)
for(y = 1; y <= YSIZE; y++)
sum += (uu[x][y]-u[x][y]);
printf("sum = %g\n",sum);
}
37
void initialize()
{
int x,y;
/* initalize
for(x = 1; x
for(y = 1;
u[x][y]
*/
<= XSIZE; x++)
y <= YSIZE; y++)
= sin((double)(x-1)/XSIZE*PI) + cos((double)(y-1)/YSIZE*PI);
for(x = 0; x < (XSIZE+2); x++){
u[x][0] = 0.0;
u[x][YSIZE+1] = 0.0;
uu[x][0] = 0.0;
uu[x][YSIZE+1] = 0.0;
}
}
for(y = 0; y < (YSIZE+2); y++){
u[0][y] = 0.0;
u[XSIZE+1][y] = 0.0;
uu[0][y] = 0.0;
uu[XSIZE+1][y] = 0.0;
}
38
ĺŀĚrÒŀƟ
•! žƊůŲżśƒƄĚEċݜľŎő
•! ¬ľĚEċݜŀüÜ
–! *5ĿŜƒŸƒſůųĻ*5ĿgainĻĿŲ
ƍƒųŜż
•! WebĺĭĹʼnĹħĵĬĠě
•! IJłĚÓ)ĺŌķĹĚʼnĹħĵĬĠě
39
LaplaceĿrÒ AMD Opteron quad , 2 socket
XSIZE=YSIZE=1000
XSIZE=YSIZE=8000
X׍ā
X׍ā
[ôžrÒ¡
[ôžrÒ¡
40
LaplaceĿrÒ
Core i7 920 @ 2.67GHz, 2 socket
XSIZE=YSIZE=8000
XSIZE=YSIZE=1000
X׍ā
X׍ā
[ôžrÒ¡
[ôžrÒ¡
41
•! OpenMPŀŨšƒƌĭĽĠƟ
42
CC-NUMAĻfirst touch
43
First touchŕįőĻ
2 socket Nehalem
44
Advanced topics
•! OpenMP 3.0
–! 2007dľapproveĬŒĴ
•! MPI/OpenMP Hybrid Programming
–! SMPşƊŨŬĺĿžƎŠƊƃƐŠ
45
OpenMP3.0ĺó2ĬŒĴ¨
Openmp.orgľZOõäĿ‰•èŸƒŧƉƐĿ›‘ĥğő
•! ŬŨşĿ™qĥó2ĬŒĴ
–! Parallel š†ĻTaskš†ĺ±vĬŒőŨƍůųĿX
–! taskš†
–! taskwaitš†
•! ƅƆƋƆűƌĿ‹º5
–! FlushĿxĠ
•! ŵŨŲĬŒĴL>ĿWÏĿ‹º5
–! Collapse}»Å
•! ŨƍůųĿŨŬůşŤřũĿ}W
•! ơƖƖĺĿprivateP„ľ[įőconstructor, destructorĿ
xĠ
46
Taskš†Ŀ
Qľparallel š†ĥpÜ
47
Stephen Olivier, Jan Prins,
Evaluating OpenMP 3.0 Run Time
Systems on Unbalanced Task
Graphs, presented in IWOMP 2009
48
Stephen Olivier, Jan Prins, Evaluating
OpenMP 3.0 Run Time Systems on
Unbalanced Task Graphs, presented
in IWOMP 2009
49
StackľĸĠĹ
=ŨƍůųĿŨŬůşŤřũŀĚ
°MP„OMP_STACKSIZE
ĺãWĺĦőě
50
SMPşƊŨŬƑƂƌŮţŗşƊŨŬ
•! PC-based SMPşƊŨŬ
–! ƂƌŮţŗ
•! Middle scale ServerĿşƊŨŬ
–! ASCI Blue Mountain, O2K
–! T2K Open Supercomputer
•! vector supercomputerĿşƊŨŬ
–! Hitachi SR11000
–! SX-6, 7, 8?
črÒáÃŤƒŸƔSMP)Ě
Ě
ƀşŬžƎŪůŤĿčö5
črÒáÃŤƒŸĿ
ŵůŲƏƒşÍ>
şƊŨŬĿŶƒųĿčö5
ƂƌŮţŗ5
şƊŨŬĿŶƒųĿSMP5
5
*ŦŨŰƄŀĠİŒŀ
ʼnŖĽSMPş
şƊŨŬľĽőƓ
XąľĽķĹĠőƓƓƓ
51
MPIĻOpenMPĿHybridžƎŠƊƃƐŠ
•! )ƒƅƆƋŀĚMPIĺĚĿSMPŀOpenMPĺ
•! MPI+OpenMP
–! ŀĮŊľĚMPIĿžƎŠƊƄŕő
–! *ľĺĦőƌƒžŕ*X×}»†ŕ!Œő
•! *û)ŀSMPĺ*ľX×ĬŒőě
•! OpenMP+MPI
–! OpenMPľŎőƂƌŮŨƍůųžƎŠƊƄ
–! singleš†Ƒmasterš†Ƒcriticalš†$ĺĚƅůŪƒŧõŕ×ġě
•! thread-SafeĽMPIĥpÜ
•! ĠħĸĤĿ¨ĺĚ4ĿWÏĥ‹Ľ¨ĥğő
–! ƂƌŮŨƍůų°MĺĿMPI
–! OpenMPĿthreadprivateP„ĿWÏƟ
•! SMP$ĺűƒŬŕ"²įőĪĻĥĺĦőĻĦľ3—ĥğőě
–! ĤĽŏİĭŋijġĽŏĽĠĪĻĥğőƔƅƆƋŸŨYýĿEċƟƕ
52
Thread-safety of MPI
•! MPI_THREAD_SINGLE
–! A process has only one thread of execution.
•! MPI_THREAD_FUNNELED
–! A process may be multithreaded, but only the thread that initialized MPI
can make MPI calls.
•! MPI_THREAD_SERIALIZED
–! A process may be multithreaded, but only one thread at a time can
make MPI calls.
•! MPI_THREAD_MULTIPLE
–! A process may be multithreaded and multiple threads can call MPI
functions simultaneously.
•! MPI_Init_thread ĺ}WĚŤƁƒŲĬŒĹĠĽĠ;Òrŋğő
53
ƥƣƤĻƂƌŮţŗ
•! pİĭŋĚHybridžƎŠƊƄŀöħĽĤķĴ
–! ėflat-MPI”ƔSMPĿĺŋƣƤƢƕĥŠĠ
–! -¨
•! űƒŬĥ"“ĺĦőĘƅƆƋŕÅÈ
•! úġƍƀƌĿ*rŕfĦ(į
•! ĭĤĭĚƂƌŮţŗşƊŨŬĺŀHybridĥpÜĽšƒŨĥ(Ĺħő
–! ŞƇůŦƈĥ"“ĬŒő
ƂƌŮţŗ
SMP
ơƤƦ
ơƤƦ
ơƤƦ
ơƤƦ
ŞƇůŦƈŞƇůŦƈŞƇůŦƈŞƇůŦƈ
ƅƆƋ
ơƤƦ
ơƤƦ
ơƤƦ
ơƤƦ
ŞƇůŦƈ
ƅƆƋ
54
RS-DFT on T2K Ŀ
ñÎDUĚÖ
ĚSݜďSMP şƊŨŬľģĩőďOpenMP/MPI ŷřŽƋůųďNPB ƗďRSDFT
ĿåĚsK'¯V¹ÁKA2009-HPC-119Ěpp. 163-168, 2009.
X׍āď(sec)
180
ēĕĖţŗ
SD
160
ĒđēĔţŗ
SD
įņĹĿŤŽƌƒŮƐĺ
CG
4OMP/MPIĥŋķĻŋčö
140
CG
etc
120
100 RotV
GS
PC
80
60
GS
pzheedv
40
20
MatE
flat MPI
4 OMP/MPI
16 OMP/MPI
flat MPI
4 OMP/MPI
16 OMP/MPI
flat MPI
4 OMP/MPI
16 OMP/MPI
flat MPI
4 OMP/MPI
16 OMP/MPI
flat MPI
4 OMP/MPI
16 OMP/MPI
flat MPI
4 OMP/MPI
16 OMP/MPI
0
HPSI
hpsi
55
55
ģŔŐľ
•! ĪŒĤŏĿčö5ľŀĚ*5ŀpĉ
•! ƘƚžƎŪůŤĨŏĠĺŎĩŒŁĚOpenMP
•! ƂƌŮţŗžƎŪůŤĺŀĚpĉ
•! ƘƚžƎŪůŤľĽŒŁĚMPIĥpĉ
–! ĵĵĭĚžƎŠƊƃƐŠĿţŨŲĻX׍āĿŲƍƒųŜżĤ
–! þ”·ľŀĚMPIľPŔőžƎŠƊƃƐŠàèĥjĴŒő
•! ½VyØáÃĿ*5ŀijŒŇļćĭħĽĠ
–! $Gįő*rĥğő
–! SĿŹŬƒƐĥ£ňķĹĠő
–! *žƎŠƊƄĿĝűťřƐŹŬƒƐĞ
rÒŋ!
56
êċ
•! ŴůžŤůşEċŕßħ*žƎŠƊƄŕOpenMPŕ
²ĠĹvĭĽĬĠě
–! ŴůžŤůşEċĻŀĚĠħĸĤĿÕ«ŕÙľ’SĿ ľĽőŎġľÙľæŊőÌʼn>Ŕıŕ¢ŊőEċ
–! NĿÕ«ĥğŐĚĜĿÕ«ĿüĬŕwiĚ ŕpiĻį
őěÙ(knapsack)ľŀ’SWĿüĬňĺĠŒőĪĻĥĺĦ
őěĪĿĻĦĚÙľĠŒőĪĻĥĺĦőÕ«ĿÌʼn>Ŕı
ŕ¢ŊĚijĿĻĦĿ ŕ¢ŊĽĬĠě
–! ¢ŊőĿŀ̒SĿ ĵĩĺŎĠěƔÌʼn>Ŕıŀ¢
ŊĽħĹŋŎĠƕ
–! ¦uƛTaskš†ŀŔĽĠĪĻ
–! źƐŲƛbÉľįőě
57
#define MAX_N 100
int
int
int
int
űƒŬĿ„Đ/
N; /*ű
Cap; /*Ŵ
ŴůžŤůşĿYýĐ/
W[MAX_N]; /* üĬęĐ/
P[MAX_N]; /* ďĐ/
int main()
{
int opt;
read_data_file(“test.dat”);
opt = knap_search(0,0,Cap);
printf(“opt=%d\n”,opt);
exit(0);
}
read_data_file(file)
char *file;
{
FILE *fp;
int i;
}
fp = fopen(file,"r");
fscanf(fp,"%d",&N);
fscanf(fp,"%d",&Cap);
for(i = 0; i < N; i++)
fscanf(fp,"%d",&W[i]);
for(i = 0; i < N; i++)
fscanf(fp,"%d",&P[i]);
fclose(fp);
58
ôž&`ª
int knap_search(int i,int cp, int M)
{
int Opt;
int l,r;
}
if (i < N && M > 0){
if(M >= W[i]){
l = knap_seach(i+1,cp+P[i],M-W[i]);
r = knap_serach(i+1,cp,M);
if(l > r) Opt = l;
else Opt = r;
} else
Opt = knap_search(i+1,cp,M);
} else Opt = cp;
return(Opt);
59
Fly UP