Actual source code: tclockfree.c

petsc-3.4.2 2013-07-02
  1: #include <../src/sys/threadcomm/impls/pthread/tcpthreadimpl.h>

  3: #define THREAD_TERMINATE      0
  4: #define THREAD_INITIALIZED    1
  5: #define THREAD_CREATED        0
  6: #if defined PETSC_HAVE_MALLOC_H
  7: #include <malloc.h>
  8: #endif

 10: /* lock-free data structure */
 11: typedef struct {
 12:   PetscThreadCommJobCtx *data;
 13:   PetscInt              *my_job_status;
 14: } sjob_lockfree;

 16: static sjob_lockfree job_lockfree = {NULL,NULL};

 18: extern PetscThreadCommJobQueue PetscJobQueue;

 20: /* Checks whether this thread is a member of tcomm */
 21: PetscBool CheckThreadCommMembership(PetscInt myrank,PetscThreadComm tcomm)
 22: {
 23:   PetscInt                i;
 24:   PetscThreadComm_PThread ptcomm;

 26:   ptcomm = (PetscThreadComm_PThread)tcomm->data;

 28:   for (i=0;i<tcomm->nworkThreads;i++) {
 29:     if (myrank == ptcomm->granks[i]) return PETSC_TRUE;
 30:   }
 31:   return PETSC_FALSE;
 32: }

 34: void SparkThreads_LockFree(PetscInt myrank,PetscThreadComm tcomm,PetscThreadCommJobCtx job)
 35: {
 36:   PetscThreadComm_PThread ptcomm;

 38:   ptcomm = (PetscThreadComm_PThread)tcomm->data;

 40:   switch (ptcomm->spark) {
 41:   case PTHREADPOOLSPARK_SELF:
 42:     if (CheckThreadCommMembership(myrank,tcomm)) {
 43:       job_lockfree.data[myrank] = job;
 44:       job->job_status[myrank]   = THREAD_JOB_RECIEVED;
 45:     }
 46:     break;
 47:   }
 48: }

 50: void *PetscPThreadCommFunc_LockFree(void *arg)
 51: {
 52:   PetscInt              my_job_counter = 0,my_kernel_ctr=0,glob_kernel_ctr;
 53:   PetscThreadCommJobCtx job;

 55: #if defined(PETSC_PTHREAD_LOCAL)
 56:   PetscPThreadRank = *(PetscInt*)arg;
 57: #else
 58:   PetscInt PetscPThreadRank=*(PetscInt*)arg;
 59:   pthread_setspecific(PetscPThreadRankkey,&PetscPThreadRank);
 60: #endif

 62:   job_lockfree.data[PetscPThreadRank]          = 0;
 63:   job_lockfree.my_job_status[PetscPThreadRank] = THREAD_INITIALIZED;

 65:   /* Spin loop */
 66:   while (PetscReadOnce(int,job_lockfree.my_job_status[PetscPThreadRank]) != THREAD_TERMINATE) {
 67:     glob_kernel_ctr = PetscReadOnce(int,PetscJobQueue->kernel_ctr);
 68:     if (my_kernel_ctr < glob_kernel_ctr) {
 69:       job = &PetscJobQueue->jobs[my_job_counter];
 70:       /* Spark the thread pool */
 71:       SparkThreads_LockFree(PetscPThreadRank,job->tcomm,job);
 72:       if (job->job_status[PetscPThreadRank] == THREAD_JOB_RECIEVED) {
 73:         /* Do own job */
 74:         PetscRunKernel(PetscPThreadRank,job_lockfree.data[PetscPThreadRank]->nargs,job_lockfree.data[PetscPThreadRank]);
 75:         /* Post job completed status */
 76:         job->job_status[PetscPThreadRank] = THREAD_JOB_COMPLETED;
 77:       }
 78:       my_job_counter = (my_job_counter+1)%job->tcomm->nkernels;
 79:       my_kernel_ctr++;
 80:     }
 81:     PetscCPURelax();
 82:   }

 84:   return NULL;
 85: }

 89: PetscErrorCode PetscThreadCommBarrier_PThread_LockFree(PetscThreadComm tcomm)
 90: {
 91:   PetscInt                active_threads=0,i;
 92:   PetscBool               wait          =PETSC_TRUE;
 93:   PetscThreadComm_PThread ptcomm        =(PetscThreadComm_PThread)tcomm->data;
 94:   PetscThreadCommJobCtx   job           =&PetscJobQueue->jobs[tcomm->job_ctr];
 95:   PetscInt                job_status;

 98:   if (tcomm->nworkThreads == 1 && ptcomm->ismainworker) return(0);

100:   /* Loop till all threads signal that they have done their job */
101:   while (wait) {
102:     for (i=0; i<tcomm->nworkThreads; i++) {
103:       job_status      = job->job_status[ptcomm->granks[i]];
104:       active_threads += job_status;
105:     }
106:     if (PetscReadOnce(int,active_threads) > 0) active_threads = 0;
107:     else wait=PETSC_FALSE;
108:   }
109:   return(0);
110: }

114: PetscErrorCode PetscPThreadCommInitialize_LockFree(PetscThreadComm tcomm)
115: {
116:   PetscErrorCode          ierr;
117:   PetscInt                i;
118:   PetscThreadComm_PThread ptcomm=(PetscThreadComm_PThread)tcomm->data;

121:   PetscMalloc(tcomm->nworkThreads*sizeof(PetscThreadCommJobCtx),&job_lockfree.data);
122:   PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&job_lockfree.my_job_status);

124:   /* Create threads */
125:   for (i=ptcomm->thread_num_start; i < tcomm->nworkThreads; i++) {
126:     job_lockfree.my_job_status[i] = THREAD_CREATED;
127:     pthread_create(&ptcomm->tid[i],&ptcomm->attr[i],&PetscPThreadCommFunc_LockFree,&ptcomm->granks[i]);
128:   }

130:   if (ptcomm->ismainworker) job_lockfree.my_job_status[0] = THREAD_INITIALIZED;

132:   PetscInt threads_initialized=0;
133:   /* Wait till all threads have been initialized */
134:   while (threads_initialized != tcomm->nworkThreads) {
135:     threads_initialized=0;
136:     for (i=0; i<tcomm->nworkThreads; i++) {
137:       if (!job_lockfree.my_job_status[ptcomm->granks[i]]) break;
138:       threads_initialized++;
139:     }
140:   }
141:   return(0);
142: }

146: PetscErrorCode PetscPThreadCommFinalize_LockFree(PetscThreadComm tcomm)
147: {
148:   PetscErrorCode          ierr;
149:   void                    *jstatus;
150:   PetscThreadComm_PThread ptcomm=(PetscThreadComm_PThread)tcomm->data;
151:   PetscInt                i;

154:   PetscThreadCommBarrier_PThread_LockFree(tcomm);
155:   for (i=ptcomm->thread_num_start; i < tcomm->nworkThreads; i++) {
156:     job_lockfree.my_job_status[i] = THREAD_TERMINATE;
157:     pthread_join(ptcomm->tid[i],&jstatus);
158:   }
159:   PetscFree(job_lockfree.my_job_status);
160:   PetscFree(job_lockfree.data);
161:   return(0);
162: }

166: PetscErrorCode PetscThreadCommRunKernel_PThread_LockFree(PetscThreadComm tcomm,PetscThreadCommJobCtx job)
167: {
168:   PetscErrorCode          ierr;
169:   PetscThreadComm_PThread ptcomm;

172:   ptcomm = (PetscThreadComm_PThread)tcomm->data;
173:   if (ptcomm->ismainworker) {
174:     job->job_status[0]   = THREAD_JOB_RECIEVED;
175:     job_lockfree.data[0] = job;
176:     PetscRunKernel(0,job->nargs, job_lockfree.data[0]);
177:     job->job_status[0]   = THREAD_JOB_COMPLETED;
178:   }
179:   if (ptcomm->synchronizeafter) {
180:     (*tcomm->ops->barrier)(tcomm);
181:   }
182:   return(0);
183: }