Actual source code: tcpthread.c
petsc-3.4.2 2013-07-02
1: /* Define feature test macros to make sure CPU_SET and other functions are available
2: */
3: #define PETSC_DESIRE_FEATURE_TEST_MACROS
5: #include <../src/sys/threadcomm/impls/pthread/tcpthreadimpl.h>
7: #if defined(PETSC_PTHREAD_LOCAL)
8: PETSC_PTHREAD_LOCAL PetscInt PetscPThreadRank;
9: #else
10: pthread_key_t PetscPThreadRankkey;
11: #endif
13: static PetscBool PetscPThreadCommInitializeCalled = PETSC_FALSE;
15: const char *const PetscPThreadCommSynchronizationTypes[] = {"LOCKFREE","PetscPThreadCommSynchronizationType","PTHREADSYNC_",0};
16: const char *const PetscPThreadCommAffinityPolicyTypes[] = {"ALL","ONECORE","NONE","PetscPThreadCommAffinityPolicyType","PTHREADAFFPOLICY_",0};
17: const char *const PetscPThreadCommPoolSparkTypes[] = {"SELF","PetscPThreadCommPoolSparkType","PTHREADPOOLSPARK_",0};
19: static PetscInt ptcommcrtct = 0; /* PThread communicator creation count. Incremented whenever a pthread
20: communicator is created and decremented when it is destroyed. On the
21: last pthread communicator destruction, the thread pool is also terminated
22: */
24: PetscErrorCode PetscThreadCommGetRank_PThread(PetscInt *trank)
25: {
26: #if defined(PETSC_PTHREAD_LOCAL)
27: *trank = PetscPThreadRank;
28: #else
29: *trank = *((PetscInt*)pthread_getspecific(PetscPThreadRankkey));
30: #endif
31: return 0;
32: }
35: /* Sets the attributes for threads */
38: PetscErrorCode PetscThreadCommSetPThreadAttributes(PetscThreadComm tcomm)
39: {
40: PetscErrorCode ierr;
41: PetscThreadComm_PThread ptcomm=(PetscThreadComm_PThread)tcomm->data;
42: pthread_attr_t *attr =ptcomm->attr;
43: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
44: PetscInt ncores;
45: cpu_set_t *cpuset;
46: #endif
47: PetscInt i;
50: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
51: PetscMalloc(tcomm->nworkThreads*sizeof(cpu_set_t),&cpuset);
52: ptcomm->cpuset = cpuset;
53: PetscGetNCores(&ncores);
54: #endif
56: for (i=ptcomm->thread_num_start; i < tcomm->nworkThreads; i++) {
57: pthread_attr_init(&attr[i]);
58: /* CPU affinity */
59: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
60: PetscInt j;
61: switch (ptcomm->aff) {
62: case PTHREADAFFPOLICY_ONECORE:
63: CPU_ZERO(&cpuset[i]);
64: CPU_SET(tcomm->affinities[i]%ncores,&cpuset[i]);
65: pthread_attr_setaffinity_np(&attr[i],sizeof(cpu_set_t),&cpuset[i]);
66: break;
67: case PTHREADAFFPOLICY_ALL:
68: CPU_ZERO(&cpuset[i]);
69: for (j=0; j<ncores; j++) CPU_SET(j,&cpuset[i]);
70: pthread_attr_setaffinity_np(&attr[i],sizeof(cpu_set_t),&cpuset[i]);
71: break;
72: case PTHREADAFFPOLICY_NONE:
73: break;
74: }
75: #endif
76: }
77: return(0);
78: }
82: PetscErrorCode PetscThreadCommDestroy_PThread(PetscThreadComm tcomm)
83: {
84: PetscThreadComm_PThread ptcomm=(PetscThreadComm_PThread)tcomm->data;
85: PetscErrorCode ierr;
88: if (!ptcomm) return(0);
89: ptcommcrtct--;
90: if (!ptcommcrtct) {
91: /* Terminate the thread pool */
92: (*ptcomm->finalize)(tcomm);
93: PetscFree(ptcomm->tid);
94: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
95: PetscFree(ptcomm->cpuset);
96: #endif
97: PetscFree(ptcomm->attr);
98: PetscPThreadCommInitializeCalled = PETSC_FALSE;
99: }
100: PetscFree(ptcomm->granks);
101: PetscFree(ptcomm);
102: return(0);
103: }
107: PETSC_EXTERN PetscErrorCode PetscThreadCommCreate_PThread(PetscThreadComm tcomm)
108: {
109: PetscThreadComm_PThread ptcomm;
110: PetscErrorCode ierr;
111: PetscInt i;
114: ptcommcrtct++;
115: PetscStrcpy(tcomm->type,PTHREAD);
116: PetscNew(struct _p_PetscThreadComm_PThread,&ptcomm);
118: tcomm->data = (void*)ptcomm;
119: ptcomm->nthreads = 0;
120: ptcomm->sync = PTHREADSYNC_LOCKFREE;
121: ptcomm->aff = PTHREADAFFPOLICY_ONECORE;
122: ptcomm->spark = PTHREADPOOLSPARK_SELF;
123: ptcomm->ismainworker = PETSC_TRUE;
124: ptcomm->synchronizeafter = PETSC_TRUE;
125: tcomm->ops->destroy = PetscThreadCommDestroy_PThread;
126: tcomm->ops->runkernel = PetscThreadCommRunKernel_PThread_LockFree;
127: tcomm->ops->barrier = PetscThreadCommBarrier_PThread_LockFree;
128: tcomm->ops->getrank = PetscThreadCommGetRank_PThread;
130: PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&ptcomm->granks);
132: if (!PetscPThreadCommInitializeCalled) { /* Only done for PETSC_THREAD_COMM_WORLD */
133: PetscBool flg1,flg2,flg3,flg4;
134: PetscPThreadCommInitializeCalled = PETSC_TRUE;
136: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"PThread communicator options",NULL);
137: PetscOptionsBool("-threadcomm_pthread_main_is_worker","Main thread is also a worker thread",NULL,PETSC_TRUE,&ptcomm->ismainworker,&flg1);
138: PetscOptionsEnum("-threadcomm_pthread_affpolicy","Thread affinity policy"," ",PetscPThreadCommAffinityPolicyTypes,(PetscEnum)ptcomm->aff,(PetscEnum*)&ptcomm->aff,&flg2);
139: PetscOptionsEnum("-threadcomm_pthread_type","Thread pool type"," ",PetscPThreadCommSynchronizationTypes,(PetscEnum)ptcomm->sync,(PetscEnum*)&ptcomm->sync,&flg3);
140: PetscOptionsEnum("-threadcomm_pthread_spark","Thread pool spark type"," ",PetscPThreadCommPoolSparkTypes,(PetscEnum)ptcomm->spark,(PetscEnum*)&ptcomm->spark,&flg4);
141: PetscOptionsBool("-threadcomm_pthread_synchronizeafter","Puts a barrier after every kernel call",NULL,PETSC_TRUE,&ptcomm->synchronizeafter,&flg1);
142: PetscOptionsEnd();
144: if (ptcomm->ismainworker) {
145: ptcomm->nthreads = tcomm->nworkThreads-1;
146: ptcomm->thread_num_start = 1;
147: } else {
148: ptcomm->nthreads = tcomm->nworkThreads;
149: ptcomm->thread_num_start = 0;
150: }
152: switch (ptcomm->sync) {
153: case PTHREADSYNC_LOCKFREE:
154: ptcomm->initialize = PetscPThreadCommInitialize_LockFree;
155: ptcomm->finalize = PetscPThreadCommFinalize_LockFree;
156: tcomm->ops->runkernel = PetscThreadCommRunKernel_PThread_LockFree;
157: tcomm->ops->barrier = PetscThreadCommBarrier_PThread_LockFree;
158: break;
159: default:
160: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only Lock-free synchronization scheme supported currently");
161: }
162: /* Set up thread ranks */
163: for (i=0; i< tcomm->nworkThreads; i++) ptcomm->granks[i] = i;
165: if (ptcomm->ismainworker) {
166: #if defined(PETSC_PTHREAD_LOCAL)
167: PetscPThreadRank=0; /* Main thread rank */
168: #else
169: pthread_key_create(&PetscPThreadRankkey,NULL);
170: pthread_setspecific(PetscPThreadRankkey,&ptcomm->granks[0]);
171: #endif
172: }
173: /* Set the leader thread rank */
174: if (ptcomm->nthreads) {
175: if (ptcomm->ismainworker) tcomm->leader = ptcomm->granks[1];
176: else tcomm->leader = ptcomm->granks[0];
177: }
179: /* Create array holding pthread ids */
180: PetscMalloc(tcomm->nworkThreads*sizeof(pthread_t),&ptcomm->tid);
181: /* Create thread attributes */
182: PetscMalloc(tcomm->nworkThreads*sizeof(pthread_attr_t),&ptcomm->attr);
183: PetscThreadCommSetPThreadAttributes(tcomm);
184: if (ptcomm->ismainworker) {
185: /* Pin main thread */
186: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
187: cpu_set_t mset;
188: PetscInt ncores,icorr;
190: PetscGetNCores(&ncores);
191: CPU_ZERO(&mset);
192: icorr = tcomm->affinities[0]%ncores;
193: CPU_SET(icorr,&mset);
194: sched_setaffinity(0,sizeof(cpu_set_t),&mset);
195: #endif
196: }
197: /* Initialize thread pool */
198: (*ptcomm->initialize)(tcomm);
200: } else {
201: PetscThreadComm gtcomm;
202: PetscThreadComm_PThread gptcomm;
203: PetscInt *granks,j,*gaffinities;
205: PetscCommGetThreadComm(PETSC_COMM_WORLD,>comm);
206: gaffinities = gtcomm->affinities;
207: gptcomm = (PetscThreadComm_PThread)tcomm->data;
208: granks = gptcomm->granks;
209: /* Copy over the data from the global thread communicator structure */
210: ptcomm->ismainworker = gptcomm->ismainworker;
211: ptcomm->thread_num_start = gptcomm->thread_num_start;
212: ptcomm->sync = gptcomm->sync;
213: ptcomm->aff = gptcomm->aff;
214: tcomm->ops->runkernel = gtcomm->ops->runkernel;
215: tcomm->ops->barrier = gtcomm->ops->barrier;
217: for (i=0; i < tcomm->nworkThreads; i++) {
218: for (j=0; j < gtcomm->nworkThreads; j++) {
219: if (tcomm->affinities[i] == gaffinities[j]) ptcomm->granks[i] = granks[j];
220: }
221: }
222: }
223: return(0);
224: }