Commit 14559f53 authored by Takhir Fakhrutdinov's avatar Takhir Fakhrutdinov

Добавили папки gravity,memcached

parent f9673ee9
......@@ -2,10 +2,13 @@
!.gitignore
!makefile
!readme.md
!configure.sh
!Makefile
!include/
!fson/
!sofa_c/
!sofa/
!rdjson/
!memcached/
!gravity/
!fte/
#!/bin/sh
CDIR=`pwd`
for i in `find zve/* -type dir -maxdepth 0` ; do
cd $CDIR/$i
../buildinfo
done
!*.c
!*.cpp
!*.h
!*.cl
!*.f
!PZ90_2
!PZ90_02_2.rtf
!EGM96
!EGM2008
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
# /System/Library/Frameworks/OpenCL.framework/Libraries/openclc -x cl -cl-std=CL1.1 -cl-auto-vectorize-enable -emit-gcl gravity.cl
PLATFORM=$(shell uname -s |awk -F- '{print $$1}')
BIN = ../build/$(PLATFORM)/bin
LIBS = ../build/$(PLATFORM)/lib
BUILD = ../build/$(PLATFORM)/obj/gravity
SHARE = ../build/$(PLATFORM)/share
INCLUDE = ../build/$(PLATFORM)/include
MODDIR = ../build/$(PLATFORM)/obj/mod
TARGET=gravity
TARGETCL=cl_gravity
TARGLIB=$(LIBS)/libgravity.a
TARGF=test
TARGETICL=openclinfo
F=.f
C=.c
SLV=solvers
OBJ=$(BUILD)/main.o $(BUILD)/wtime.o
OBJ1=$(BUILD)/cl_gravity.o $(BUILD)/wtime.o $(BUILD)/cl_hlp.o
OBJLIB=$(BUILD)/gravity.o
OBJF=$(BUILD)/test.o
LD=ld
GCC=gcc
FC=gfortran
AR=ar
GCCFLAGS += -g -Wall -c -O -I/usr/local/include -I$(INCLUDE)
GCCFLAGS += $(FCEXTRA)
FCFLAGS += -Wall -ffree-line-length-none -c -O -J$(MODDIR) -cpp
FCFLAGS += $(FCEXTRA)
ifeq ($(PLATFORM),$(filter $(PLATFORM),FreeBSD))
LDFLAGS += -lexecinfo
endif
ifeq ($(PLATFORM),$(filter $(PLATFORM),Darwin Linux))
LDFLAGS += -lgfortran -lstdc++ -lgravity -lm -L$(LIBS) -L/opt/local/lib -L/opt/local/lib/gcc48
else
LDFLAGS += -lgfortran -lstdc++ -lgravity -lm -L$(LIBS) -L/usr/local/lib -L/usr/local/lib/gcc48 -rpath /usr/local/lib/gcc48
endif
ifeq ($(PLATFORM),Darwin)
GCC=clang
GCCFLAGS += -framework OpenCL -std=c99
LDFLAGS += -framework OpenCL
else
ifeq ($(PLATFORM),Linux)
LDFLAGS += -lOpenCL
endif
endif
LDFLAGS += $(FCEXTRA)
RM=rm
all: dirs $(TARGLIB)
ifeq ($(PLATFORM),FreeBSD)
test: $(TARGET) $(TARGF)
else
test: $(TARGET) $(TARGETCL) $(TARGF) $(TARGETICL)
endif
dirs:
mkdir -p $(BIN) $(LIBS) $(BUILD) $(SHARE) $(MODDIR) $(INCLUDE)
cp EGM2008 $(SHARE)
cp EGM96 $(SHARE)
cp PZ90_2 $(SHARE)
cp gravity.h $(INCLUDE)
cp ../memcached/memc_gate.h $(INCLUDE)
$(TARGLIB) : $(OBJLIB)
$(AR) rvs $@ $(OBJLIB)
$(TARGET) : $(OBJ) $(OBJLIB)
$(GCC) -O -o $@ $(OBJ) $(LDFLAGS) -lmemc_gate -lmemcached -lsofa_c -lsofa
$(TARGETCL) : $(OBJ1)
$(GCC) -O -o $@ $(OBJ1) $(LDFLAGS)
$(TARGETICL) : $(TARGETICL).o
$(GCC) -O -o $@ $< $(LDFLAGS)
$(TARGF) : $(OBJF)
$(FC) -O -o $@ $(OBJF) $(LDFLAGS) -lmemc_gate -lmemcached -lsofa_c -lsofa -lsolv
$(BUILD)/%.o : %.c
$(GCC) $(GCCFLAGS) -c $< -o $@
$(BUILD)/%.o : %.cpp
$(GCC) $(GCCFLAGS) -c $< -o $@
$(BUILD)/%.o : %.f
$(FC) $(FCFLAGS) -c $< -o $@
clean:
rm -rf $(BUILD) $(TARGET) $(TARGETICL) $(TARGETCL) $(TARGLIB) $(TARGF) $(SHARE)/EGM2008 $(SHARE)/EGM96 $(SHARE)/PZ90_2
#all: memc_gate.c
# gcc -Wall -I/usr/local/include -L/usr/local/lib -L/usr/home/viking/develop/solvers/build/FreeBSD/lib -lmemcached -lsofa_c -lm -lexecinfo -lstdc++ -lc -o memc_gate memc_gate.c
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#define VECTOR_SIZE 4
#define VECTORS 180
#define MAX(a,b) ((a>=b)?a:b)
#define MIN(a,b) ((a<=b)?a:b)
#define MAINDEV CL_DEVICE_TYPE_GPU
extern double wtime();
extern char *getKernelSource(char *);
extern int output_device_info(cl_device_id);
extern void error(char*, cl_int);
extern char* err_code (cl_int);
double fRand(double fMin, double fMax)
{
double f = (double)rand() / RAND_MAX;
return fMin + f * (fMax - fMin);
}
int main(void) {
int i;
const char *k_harmo = getKernelSource("gravity.cl");
// Allocate space for vectors A, B and C
double *A = (double*)malloc(sizeof(double)*VECTOR_SIZE*VECTORS);
double *B = (double*)malloc(sizeof(double)*VECTOR_SIZE*VECTORS);
for (i=0; i<VECTORS; i++) {
A[i*VECTOR_SIZE+0] = fRand(-35000,35000);
A[i*VECTOR_SIZE+1] = fRand(-35000,35000);
A[i*VECTOR_SIZE+2] = fRand(-35000,35000);
A[i*VECTOR_SIZE+3] = 0;
}
/*
A[0] = 30544.803906240781;
A[1] = 28708.707913342118;
A[2] = 4777.8592942243504;
*/
// for(i = 0; i < VECTOR_SIZE; i++) B[i] = 0;
// Get platform and device information
cl_platform_id * platforms = NULL;
cl_uint num_platforms;
cl_int clStatus = clGetPlatformIDs(0, NULL, &num_platforms);
if (clStatus != CL_SUCCESS || num_platforms <= 0) error("Failed to find a platforms.",clStatus);
platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id)*num_platforms);
clStatus = clGetPlatformIDs(num_platforms, platforms, NULL);
if (clStatus != CL_SUCCESS || num_platforms <= 0) error("Failed to get the platforms.",clStatus);
//Get the devices list and choose the device you want to run on
cl_device_id *device_list = NULL;
cl_uint num_devices;
clStatus = clGetDeviceIDs( platforms[0], MAINDEV, 0, NULL, &num_devices);
device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices);
clStatus = clGetDeviceIDs( platforms[0], MAINDEV, num_devices, device_list, NULL);
// for (i=0; i<num_devices; i++)
// output_device_info(device_list[i]);
cl_context context = clCreateContext( NULL, num_devices, device_list, NULL, NULL, &clStatus);
if (!context) error("Failed to create a compute context.", clStatus);
cl_command_queue command_queue = clCreateCommandQueue( context, device_list[0], 0, &clStatus);
if (!command_queue) error("Failed to create a command queue.", clStatus);
// Create a program from the kernel source
cl_program program = clCreateProgramWithSource(context, 1, (const char **)&k_harmo, 0, &clStatus);
if (clStatus != CL_SUCCESS) error("Error: Failed to create compute program.", clStatus);
// Build the program
// clStatus = clBuildProgram(program, 1, device_list, NULL, NULL, NULL);
clStatus = clBuildProgram(program, 1, device_list, "-DHARMONIX=64", NULL, NULL);
if (clStatus != CL_SUCCESS) {
size_t len;
char buffer[2048];
clGetProgramBuildInfo(program, NULL, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
error("Error: Failed to build program executable.", clStatus);
}
// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "harmo", &clStatus);
if (clStatus != CL_SUCCESS) error("Failed to translate kernel.",clStatus);
// Create memory buffers on the device for each vector
cl_mem A_clmem = clCreateBuffer(context, CL_MEM_READ_ONLY, VECTOR_SIZE * VECTORS * sizeof(double), NULL, &clStatus);
if (clStatus != CL_SUCCESS) error("Failed to create clBuffer.",clStatus);
cl_mem B_clmem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, VECTOR_SIZE * VECTORS * sizeof(double), NULL, &clStatus);
if (clStatus != CL_SUCCESS) error("Failed to create clBuffer.",clStatus);
double rtime = wtime();
// Copy the Buffer A to the device
clStatus = clEnqueueWriteBuffer(command_queue, A_clmem, CL_TRUE, 0, VECTOR_SIZE * VECTORS * sizeof(double), A, 0, NULL, NULL);
if (clStatus != CL_SUCCESS) error("clEnqueueWriteBuffer failed.", clStatus);
// Set the arguments of the kernel
clStatus = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&A_clmem);
clStatus = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&B_clmem);
// Execute the OpenCL kernel on the list
size_t global_size = VECTORS; // Process the entire lists
size_t local_size = MAX(VECTORS,2048); // Process items at a time
clStatus = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
// Read the cl memory C_clmem on device to the host variable C
clStatus = clEnqueueReadBuffer(command_queue, B_clmem, CL_TRUE, 0, VECTOR_SIZE * VECTORS * sizeof(double), B, 0, NULL, NULL);
// Clean up and wait for all the comands to complete.
clStatus = clFlush(command_queue);
clStatus = clFinish(command_queue);
rtime = wtime() - rtime;
// Display the result to the screen
for(i = 0; i < VECTORS; i++) printf("%+10.3lf %+10.3lf %+10.3lf %+.16e %+.16e %+.16e\n", A[i*VECTOR_SIZE],A[i*VECTOR_SIZE+1],A[i*VECTOR_SIZE+2],B[i*VECTOR_SIZE],B[i*VECTOR_SIZE+1],B[i*VECTOR_SIZE+2]);
printf("%lf\n", rtime);
// Finally release all OpenCL allocated objects and host buffers.
clStatus = clReleaseKernel(kernel);
clStatus = clReleaseProgram(program);
clStatus = clReleaseMemObject(A_clmem);
clStatus = clReleaseMemObject(B_clmem);
clStatus = clReleaseCommandQueue(command_queue);
clStatus = clReleaseContext(context);
free(A);
free(B);
free(platforms);
free(device_list);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define vImage_Utilities_h
#define vImage_CVUtilities_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
//
// define VERBOSE if you want to print info about work groups sizes
#define VERBOSE 1
//------------------------------------------------------------------------------
//
// Name: err_code()
//
// Purpose: Function to output descriptions of errors for an input error code
//
//
// RETURN: echoes the input error code
//
// HISTORY: Written by Tim Mattson, June 2010
// This version automatically produced by genErrCode.py
// script written by Tom Deakin, August 2013
//
//------------------------------------------------------------------------------
char *err_code (cl_int err_in)
{
switch (err_in) {
case CL_SUCCESS :
return (char*)" CL_SUCCESS ";
case CL_DEVICE_NOT_FOUND :
return (char*)" CL_DEVICE_NOT_FOUND ";
case CL_DEVICE_NOT_AVAILABLE :
return (char*)" CL_DEVICE_NOT_AVAILABLE ";
case CL_COMPILER_NOT_AVAILABLE :
return (char*)" CL_COMPILER_NOT_AVAILABLE ";
case CL_MEM_OBJECT_ALLOCATION_FAILURE :
return (char*)" CL_MEM_OBJECT_ALLOCATION_FAILURE ";
case CL_OUT_OF_RESOURCES :
return (char*)" CL_OUT_OF_RESOURCES ";
case CL_OUT_OF_HOST_MEMORY :
return (char*)" CL_OUT_OF_HOST_MEMORY ";
case CL_PROFILING_INFO_NOT_AVAILABLE :
return (char*)" CL_PROFILING_INFO_NOT_AVAILABLE ";
case CL_MEM_COPY_OVERLAP :
return (char*)" CL_MEM_COPY_OVERLAP ";
case CL_IMAGE_FORMAT_MISMATCH :
return (char*)" CL_IMAGE_FORMAT_MISMATCH ";
case CL_IMAGE_FORMAT_NOT_SUPPORTED :
return (char*)" CL_IMAGE_FORMAT_NOT_SUPPORTED ";
case CL_BUILD_PROGRAM_FAILURE :
return (char*)" CL_BUILD_PROGRAM_FAILURE ";
case CL_MAP_FAILURE :
return (char*)" CL_MAP_FAILURE ";
case CL_MISALIGNED_SUB_BUFFER_OFFSET :
return (char*)" CL_MISALIGNED_SUB_BUFFER_OFFSET ";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST :
return (char*)" CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST ";
case CL_INVALID_VALUE :
return (char*)" CL_INVALID_VALUE ";
case CL_INVALID_DEVICE_TYPE :
return (char*)" CL_INVALID_DEVICE_TYPE ";
case CL_INVALID_PLATFORM :
return (char*)" CL_INVALID_PLATFORM ";
case CL_INVALID_DEVICE :
return (char*)" CL_INVALID_DEVICE ";
case CL_INVALID_CONTEXT :
return (char*)" CL_INVALID_CONTEXT ";
case CL_INVALID_QUEUE_PROPERTIES :
return (char*)" CL_INVALID_QUEUE_PROPERTIES ";
case CL_INVALID_COMMAND_QUEUE :
return (char*)" CL_INVALID_COMMAND_QUEUE ";
case CL_INVALID_HOST_PTR :
return (char*)" CL_INVALID_HOST_PTR ";
case CL_INVALID_MEM_OBJECT :
return (char*)" CL_INVALID_MEM_OBJECT ";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR :
return (char*)" CL_INVALID_IMAGE_FORMAT_DESCRIPTOR ";
case CL_INVALID_IMAGE_SIZE :
return (char*)" CL_INVALID_IMAGE_SIZE ";
case CL_INVALID_SAMPLER :
return (char*)" CL_INVALID_SAMPLER ";
case CL_INVALID_BINARY :
return (char*)" CL_INVALID_BINARY ";
case CL_INVALID_BUILD_OPTIONS :
return (char*)" CL_INVALID_BUILD_OPTIONS ";
case CL_INVALID_PROGRAM :
return (char*)" CL_INVALID_PROGRAM ";
case CL_INVALID_PROGRAM_EXECUTABLE :
return (char*)" CL_INVALID_PROGRAM_EXECUTABLE ";
case CL_INVALID_KERNEL_NAME :
return (char*)" CL_INVALID_KERNEL_NAME ";
case CL_INVALID_KERNEL_DEFINITION :
return (char*)" CL_INVALID_KERNEL_DEFINITION ";
case CL_INVALID_KERNEL :
return (char*)" CL_INVALID_KERNEL ";
case CL_INVALID_ARG_INDEX :
return (char*)" CL_INVALID_ARG_INDEX ";
case CL_INVALID_ARG_VALUE :
return (char*)" CL_INVALID_ARG_VALUE ";
case CL_INVALID_ARG_SIZE :
return (char*)" CL_INVALID_ARG_SIZE ";
case CL_INVALID_KERNEL_ARGS :
return (char*)" CL_INVALID_KERNEL_ARGS ";
case CL_INVALID_WORK_DIMENSION :
return (char*)" CL_INVALID_WORK_DIMENSION ";
case CL_INVALID_WORK_GROUP_SIZE :
return (char*)" CL_INVALID_WORK_GROUP_SIZE ";
case CL_INVALID_WORK_ITEM_SIZE :
return (char*)" CL_INVALID_WORK_ITEM_SIZE ";
case CL_INVALID_GLOBAL_OFFSET :
return (char*)" CL_INVALID_GLOBAL_OFFSET ";
case CL_INVALID_EVENT_WAIT_LIST :
return (char*)" CL_INVALID_EVENT_WAIT_LIST ";
case CL_INVALID_EVENT :
return (char*)" CL_INVALID_EVENT ";
case CL_INVALID_OPERATION :
return (char*)" CL_INVALID_OPERATION ";
case CL_INVALID_GL_OBJECT :
return (char*)" CL_INVALID_GL_OBJECT ";
case CL_INVALID_BUFFER_SIZE :
return (char*)" CL_INVALID_BUFFER_SIZE ";
case CL_INVALID_MIP_LEVEL :
return (char*)" CL_INVALID_MIP_LEVEL ";
case CL_INVALID_GLOBAL_WORK_SIZE :
return (char*)" CL_INVALID_GLOBAL_WORK_SIZE ";
case CL_INVALID_PROPERTY :
return (char*)" CL_INVALID_PROPERTY ";
default:
return (char*)"UNKNOWN ERROR";
}
}
void error(char *str, cl_int clStatus){
fprintf(stderr,"ERROR: %s\n%s\n",str,err_code(clStatus));
exit(EXIT_FAILURE);
}
int output_device_info(cl_device_id device_id)
{
int err,i; // error code returned from OpenCL calls
cl_device_type device_type; // Parameter defining the type of the compute device
cl_uint comp_units; // the max number of compute units on a device
cl_char vendor_name[1024] = {0}; // string to hold vendor name for compute device
cl_char device_name[1024] = {0}; // string to hold name of compute device
#ifdef VERBOSE
cl_uint max_work_itm_dims;
size_t max_wrkgrp_size;
size_t *max_loc_size;
#endif
err = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), &device_name, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to access device name!\n");
return EXIT_FAILURE;
}
printf(" \n Device is %s ",device_name);
err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to access device type information!\n");
return EXIT_FAILURE;
}
if(device_type == CL_DEVICE_TYPE_GPU)
printf(" GPU from ");
else if (device_type == CL_DEVICE_TYPE_CPU)
printf("\n CPU from ");
else
printf("\n non CPU or GPU processor from ");
err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), &vendor_name, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to access device vendor name!\n");
return EXIT_FAILURE;
}
printf(" %s ",vendor_name);
err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &comp_units, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to access device number of compute units !\n");
return EXIT_FAILURE;
}
printf(" with a max of %d compute units \n",comp_units);
#ifdef VERBOSE
//
// Optionally print information about work group sizes
//
err = clGetDeviceInfo( device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint),
&max_work_itm_dims, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to get device Info (CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)\n%s\n", err_code(err));
return EXIT_FAILURE;
}
max_loc_size = (size_t*)malloc(max_work_itm_dims * sizeof(size_t));
if(max_loc_size == NULL){
printf(" malloc failed\n");
return EXIT_FAILURE;
}
err = clGetDeviceInfo( device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, max_work_itm_dims* sizeof(size_t),
max_loc_size, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to get device Info (CL_DEVICE_MAX_WORK_ITEM_SIZES)\n%s\n",err_code(err));
return EXIT_FAILURE;
}
err = clGetDeviceInfo( device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
&max_wrkgrp_size, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to get device Info (CL_DEVICE_MAX_WORK_GROUP_SIZE)\n%s\n",err_code(err));
return EXIT_FAILURE;
}
printf("work group, work item information");
printf("\n max loc dim ");
for(i=0; i< max_work_itm_dims; i++)
printf(" %d ",(int)(*(max_loc_size+i)));
printf("\n");
printf(" Max work group size = %d\n",(int)max_wrkgrp_size);
#endif
return CL_SUCCESS;
}
//------------------------------------------------------------------------------
char * getKernelSource(char *filename)
{
FILE *file = fopen(filename, "r");
if (!file)
{
fprintf(stderr, "Error: Could not open kernel source file\n");
exit(EXIT_FAILURE);
}
fseek(file, 0, SEEK_END);
int len = ftell(file) + 1;
rewind(file);
char *source = (char *)calloc(sizeof(char), len);
if (!source)
{
fprintf(stderr, "Error: Could not allocate memory for source string\n");
exit(EXIT_FAILURE);
}
fread(source, sizeof(char), len, file);
fclose(file);
return source;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <float.h>
#include "gravity.h"
#include "memc_gate.h"
extern void memc_legendre(int N, double sin_fi, double cos_fi , double *P);
char pf_first_run = TRUE;
PlanetField mpf;
int initplanetfield(int PF, PlanetField *f) {
FILE* fd;
char rbuf[2048];
char fnbuf[1024];
int i,n,m;
double C,S,Qnm;
char *env;
f->count = 0;
memset( fnbuf, 0, 1024 );
env = getenv("GRAVITY");
switch(PF){
case PF_EGM2008 : f->fname = "EGM2008" ; f->GM = EGM2008_GM ; f->A = EGM2008_A ; f->f = EGM2008_F ; f->planet = jplEarth ; f->mem = MIN(361,MAXHARMONIX+1) ; break ;
case PF_EGM96 : f->fname = "EGM96" ; f->GM = EGM96_GM ; f->A = EGM96_A ; f->f = EGM96_F ; f->planet = jplEarth ; f->mem = MIN(361,MAXHARMONIX+1) ; break ;
case PF_PZ90_2 : f->fname = "PZ90_2" ; f->GM = PZ90_2_GM ; f->A = PZ90_2_A ; f->f = PZ90_2_F ; f->planet = jplEarth ; f->mem = MIN(361,MAXHARMONIX+1) ; break ;
default: return FALSE;
}
f->GM_div_A = f->GM / f->A ;
if ( env != NULL ) {
memcpy(fnbuf,env,strlen(env));
memcpy(fnbuf+strlen(env),f->fname,strlen(f->fname)) ;
} else
memcpy(fnbuf,f->fname,strlen(f->fname));
i = f->mem * f->mem * sizeof(double);
if( ( f->C = (double*)malloc(i) ) == NULL || ( f->S = (double*)malloc(i) ) == NULL ) return FALSE;
memset( f->C, 0, i );
memset( f->S, 0, i );
if ( ( fd = fopen(fnbuf,"rt") ) != NULL ) {
while ( fgets(rbuf,sizeof(rbuf),fd) != NULL ) {
if( sscanf( rbuf,"%d %d %lf %lf",&n,&m,&C,&S) != 4 ) break;
if( n > MAXHARMONIX ) break;
i = n * f->mem + m ;
f->C[i] = C ;
f->S[i] = S ;
f->count = n ;
}
f->nC20 = f->C[ f->mem << 1 ];
for( n = 2; n <= f->count ; n++ ) {
Qnm = sqrt( 1. / ( 1. + 2. * (double)n ) );
for( m = 0; m <= n; m++ ) {
if( m == 1 )
Qnm *= sqrt( (double)( n * (1+n) / 2 ) );
else if( m >= 2 )
Qnm *= sqrt( (double)( (n+m) * (n-m+1) ) );
i = n * f->mem + m;
f->C[i] /= Qnm;
f->S[i] /= Qnm;
}
}
} else {
f->nC20=0.;
return FALSE;
}
/*
// --------------------------
// OpenCL static array output
// --------------------------
fprintf(stderr,"__constant double C[%d]={",(f->count+1)*(f->count+1));
for ( n=0; n < (f->count+1)*(f->count+1); n++)
if ( f->C[n] > DBL_EPSILON )
fprintf(stderr,"%+.100lg,",f->C[n]);
else
if ( f->C[n] < -DBL_EPSILON )
fprintf(stderr,"%+.100lg,",f->C[n]);
else
fprintf(stderr,"0,");
fprintf(stderr,"};\n%d\n",n);
fprintf(stderr,"__constant double S[%d]={",(f->count+1)*(f->count+1));
for ( n=0; n < (f->count+1)*(f->count+1); n++)
if ( f->S[n] > DBL_EPSILON )
fprintf(stderr,"%+.100lg,",f->S[n]);
else
if ( f->S[n] < -DBL_EPSILON )
fprintf(stderr,"%+.100lg,",f->S[n]);
else
fprintf(stderr,"0,");
fprintf(stderr,"};\n%d\n",n);
// --------------------------
*/
return TRUE;
};
void planetfieldacceleration(int N, PlanetField *f, double *x, double *grad){
int m,n,i;
double r,q2,u1,gamma,q1,ro,rrr,cos_l,sin_l,sin_fi,cos_fi,dUdL,dUdR,dUdX,rm,rn,Pmm,cos_ml,sin_ml,Pn1m,Pnm;
double cos_m1l,sin_m1l;
// double u;
if( N < 0 || N > f->count ) N = f->count;
gamma = f->GM_div_A;
q1 = x[0] * x[0] + x[1] * x[1];
r = sqrt( q1 + x[2] * x[2] );
rrr = sqrt( q1 );
ro = f->A / r;
if ( rrr < DBL_EPSILON ){
cos_l = 1.;
sin_l = 0.;
} else {
cos_l = x[0] / rrr;
sin_l = x[1] / rrr;
}
sin_fi = x[2] / r;
cos_fi = rrr / r;
dUdL = 0.; dUdR = 0.; dUdX = 0.; rm = 1.; Pmm = 1.;
// u = 0.;
cos_m1l = cos_ml = 1.;
sin_m1l = sin_ml = 0.;
Pnm = Pn1m = 0;
for( m = 0; m <= N; m++ ){
rn = rm;
u1=0.;
for( n = m; n <= N; n++ ){
i = n * f->mem + m;
rn *= ro;
if( n == m ){
Pn1m = 0.;
Pnm = Pmm;
} else {
q1 = Pnm;
Pnm = ( (double)( 2 * n-1 ) * sin_fi * Pnm -(double)( n + m - 1 ) * Pn1m ) / (double)( n - m );
Pn1m = q1;
}
q2 = Pnm * rn;
u1 += ( f->S[i] * cos_ml - f->C[i] * sin_ml ) * q2;
q1 = ( f->S[i] * sin_ml + f->C[i] * cos_ml ) * q2;
// if( n != 0 ) u += q1;
dUdR += q1 * (double)( n + 1 );
dUdX += -q1 * sin_fi * (double) m;
if( m != 0 ) {
dUdX += cos_fi * q2 * ( f->C[i-1] * cos_m1l + f->S[i-1] * sin_m1l );
}
// printf("%.15e ",Pnm);
}
dUdL += u1 * (double) m;
cos_m1l = cos_ml;
sin_m1l = sin_ml;
cos_ml = cos_m1l * cos_l - sin_m1l * sin_l;
sin_ml = cos_m1l * sin_l + sin_m1l * cos_l;
Pmm *= cos_fi * (double)( 2 * m + 1 );
rm *= ro;
// printf("\n");
}
if( fabs( rrr ) > DBL_EPSILON ){
dUdL *= gamma / rrr;
dUdX *= gamma / ( cos_fi * cos_fi );
}
dUdR = -gamma / f->A * dUdR * ro;
q1 = sin_fi / r / r;
grad[0] = dUdR * x[0] / r - dUdX * q1 * x[0] - dUdL * sin_l;
grad[1] = dUdR * x[1] / r - dUdX * q1 * x[1] + dUdL * cos_l;
grad[2] = dUdR * sin_fi + dUdX *cos_fi * cos_fi / r;
// *U = u * f->GM / r / ro;
}
#define POLYSIZE sizeof(double[N+1][N+1])
void legendre(int N, double sin_fi, double cos_fi , double *P){
int n, m;
double Pnm, Pmm, Pn1m, q1;
memset( (void *)P, 0, POLYSIZE );
Pmm = 1.;
Pnm = 0.;
Pn1m = 0.;
for( m = 0; m <= N; m++ ){
for( n = m; n <= N; n++ ){
if( n == m ){
Pn1m = 0.;
Pnm = Pmm;
} else {
q1 = Pnm;
Pnm = ( (double)( ( n << 1 ) - 1 ) * sin_fi * Pnm - (double)( n + m - 1 ) * Pn1m ) / (double)( n - m );
Pn1m = q1;
}
P[n*N+m] = Pnm;
}
Pmm *= cos_fi * (double)( ( m << 1 ) + 1 );
}
}
void pfa(int N, PlanetField *f, double *x, double *grad){
int m,n,i;
double r,q2,u1,gamma,q1,ro,rrr,cos_l,sin_l,sin_fi,cos_fi,dUdL,dUdR,dUdX,rm,rn,cos_ml,sin_ml;
double cos_m1l,sin_m1l;
// double u;
double *P;
if( N < 0 || N > f->count ) N = f->count;
P = (double *)malloc( POLYSIZE );
gamma = f->GM_div_A;
q1 = x[0] * x[0] + x[1] * x[1];
r = sqrt( q1 + x[2] * x[2] );
rrr = sqrt( q1 );
ro = f->A / r;
if ( rrr < DBL_EPSILON ){
cos_l = 1.;
sin_l = 0.;
} else {
cos_l = x[0] / rrr;
sin_l = x[1] / rrr;
}
sin_fi = x[2] / r;
cos_fi = rrr / r;
legendre( N, sin_fi, cos_fi, P );
// u = 0.;
dUdL = 0.; dUdR = 0.; dUdX = 0.; rm = 1.; cos_ml = 1.; sin_ml = 0.;
cos_m1l = cos_ml;
sin_m1l = sin_ml;
for( m = 0; m <= N; m++ ){
rn = rm;
u1=0.;
for( n = m; n <= N; n++ ){
i = n * f->mem + m;
rn *= ro;
q2 = P[n*N+m] * rn;
u1 += ( f->S[i] * cos_ml - f->C[i] * sin_ml ) * q2;
q1 = ( f->S[i] * sin_ml + f->C[i] * cos_ml ) * q2;
// if( n != 0 ) u += q1;
dUdR += q1 * (double)( n + 1 );
dUdX += -q1 * sin_fi * (double) m;
if( m != 0 ) {
dUdX += cos_fi * q2 * ( f->C[i-1] * cos_m1l + f->S[i-1] * sin_m1l );
}
}
dUdL += u1 * (double) m;
cos_m1l = cos_ml;
sin_m1l = sin_ml;
cos_ml = cos_m1l * cos_l - sin_m1l * sin_l;
sin_ml = cos_m1l * sin_l + sin_m1l * cos_l;
rm *= ro;
}
if( fabs( rrr ) > DBL_EPSILON ){
dUdL *= gamma / rrr;
dUdX *= gamma / ( cos_fi * cos_fi );
}
dUdR = -gamma / f->A * dUdR * ro;
q1 = sin_fi / r / r;
grad[0] = dUdR * x[0] / r - dUdX * q1 * x[0] - dUdL * sin_l;
grad[1] = dUdR * x[1] / r - dUdX * q1 * x[1] + dUdL * cos_l;
grad[2] = dUdR * sin_fi + dUdX *cos_fi * cos_fi / r;
// *U = u * f->GM / r / ro;
free(P);
}
#define POLYSIZE8 sizeof(double)*81
void legendre8(double sin_fi, double cos_fi , double *P){
int n, m;
double Pnm, Pmm, Pn1m, q1;
memset( (void *)P, 0, POLYSIZE8 );
Pmm = 1.;
Pnm = 0.;
Pn1m = 0.;
for( m = 0; m <= 8; m++ ){
for( n = m; n <= 8; n++ ){
if( n == m ){
Pn1m = 0.;
Pnm = Pmm;
} else {
q1 = Pnm;
Pnm = ( (double)( ( n << 1 ) -1 ) * sin_fi * Pnm - (double)( n + m - 1 ) * Pn1m ) / (double)( n - m );
Pn1m = q1;
}
P[ ( n << 3 ) + m ] = Pnm;
}
Pmm *= cos_fi * (double)( ( m << 1 ) + 1 );
}
}
void pfa8(PlanetField *f, double *x, double *grad){
int m,n,i;
double r,q2,u1,gamma,q1,ro,rrr,cos_l,sin_l,sin_fi,cos_fi,dUdL,dUdR,dUdX;
double *P;
double rm;
double rn[9][9];
double cos_ml[9];
double sin_ml[9];
P = (double *)malloc( POLYSIZE8 );
gamma = f->GM_div_A;
q1 = x[0] * x[0] + x[1] * x[1];
r = sqrt( q1 + x[2] * x[2] );
rrr = sqrt( q1 );
ro = f->A / r;
if ( rrr < DBL_EPSILON ){
cos_l = 1.;
sin_l = 0.;
} else {
cos_l = x[0] / rrr;
sin_l = x[1] / rrr;
}
sin_fi = x[2] / r;
cos_fi = rrr / r;
legendre8(sin_fi, cos_fi, P );
rm = 1.;
cos_ml[0] = 1.;
sin_ml[0] = 0.;
for( m = 0; m <= 8; m++ ){
rn[m][m]=rm;
if ( m < 8 ) {
cos_ml[m+1] = cos_ml[m] * cos_l - sin_ml[m] * sin_l;
sin_ml[m+1] = cos_ml[m] * sin_l + sin_ml[m] * cos_l;
}
for( n = m; n <= 8; n++ )
if ( n == m )
rn[m][n] *= ro;
else
rn[m][n] = rn[m][n-1] * ro;
rm *= ro;
}
dUdL = 0.; dUdR = 0.; dUdX = 0.;
for( m = 0; m <= 8; m++ ){
u1=0.;
for( n = m; n <= 8; n++ ){
i = n * f->mem + m;
q2 = P[ (n << 3) + m ] * rn[m][n];
u1 += ( f->S[i] * cos_ml[m] - f->C[i] * sin_ml[m] ) * q2;
q1 = ( f->S[i] * sin_ml[m] + f->C[i] * cos_ml[m] ) * q2;
dUdR += q1 * (double)( n + 1 );
dUdX += -q1 * sin_fi * (double) m;
if( m != 0 )
dUdX += cos_fi * q2 * ( f->C[i-1] * cos_ml[m-1] + f->S[i-1] * sin_ml[m-1] );
}
dUdL += u1 * (double) m;
}
if( fabs( rrr ) > DBL_EPSILON ){
dUdL *= gamma / rrr;
dUdX *= gamma / ( cos_fi * cos_fi );
}
dUdR = -gamma / f->A * dUdR * ro;
q1 = sin_fi / r / r;
grad[0] = dUdR * x[0] / r - dUdX * q1 * x[0] - dUdL * sin_l;
grad[1] = dUdR * x[1] / r - dUdX * q1 * x[1] + dUdL * cos_l;
grad[2] = dUdR * sin_fi + dUdX * cos_fi * cos_fi / r;
free(P);
}
void c_harmo(int N, double *x, double *f){
if ( pf_first_run ) {
initplanetfield(MAIN_PF_MODEL,&mpf);
pf_first_run = ! pf_first_run;
}
planetfieldacceleration(N,&mpf,x,f);
}
void c_harmo_(int N, double *x, double *f){
if ( pf_first_run ) {
initplanetfield(MAIN_PF_MODEL,&mpf);
pf_first_run = ! pf_first_run;
}
pfa(N,&mpf,x,f);
}
void c_harmo8(double *x, double *f){
if ( pf_first_run ) {
initplanetfield(MAIN_PF_MODEL,&mpf);
pf_first_run = ! pf_first_run;
}
pfa8(&mpf,x,f);
}
This source diff could not be displayed because it is too large. You can view the blob instead.
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
//#define EGM2008_GM 3986004.418e+08
//#define EGM96_GM 3986004.415e+08
//#define PZ90_2_GM 3986004.418e+08
#define EGM2008_GM 398600.4418
#define EGM96_GM 398600.4418
#define PZ90_2_GM 398600.4418
#define EGM2008_A 6378.1370
#define EGM96_A 6378.1370
#define PZ90_2_A 6378.1360
#define EGM2008_F (1./298.257223563)
#define EGM96_F (1./298.257223563)
#define PZ90_2_F (1./298.25784)
#define TRUE 1
#define FALSE !TRUE
#define MAXHARMONIX 8
enum {
jplMercury,
jplVenus,
jplEarth,
jplMars,
jplJupiter,
jplSaturn,
jplUranus,
jplNeptune,
jplPluto,
jplMoon,
jplSun,
jplSS_Bary,
jplEM_Bary,
jplNutations,
jplLibrations,
jplEmbary
};
enum {
PF_EGM2008,
PF_EGM96,
PF_PZ90_2
};
#define MAIN_PF_MODEL PF_PZ90_2
typedef struct {
int planet;
int count;
double GM; // геоцентрическая гравитационная постоянная, [м^3/с^2]
double A; // экваториальный радиус, м
double GM_div_A; // ;)
double f; // сжатие эллипсоида
double *C;
double *S;
double nC20;
int mem;
char *fname;
}PlanetField;
int initplanetfield(int PF,PlanetField *field);
extern void planetfieldacc(int N,PlanetField *f,double *x,double *grad,double *U);
extern void c_harmo(int N, double *x, double *grad);
extern void c_harmo_(int N, double *x, double *grad);
extern void c_harmo8(double *x, double *grad);
extern void legendre(int N, double sin_fi, double cos_fi, double *P);
extern void legendre8(double sin_fi, double cos_fi, double *P);
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "gravity.h"
#define VECTOR_SIZE 4
#define VECTORS 180
extern double wtime();
double fRand(double fMin, double fMax)
{
double f = (double)rand() / RAND_MAX;
return fMin + f * (fMax - fMin);
}
int main() {
int i,j;
// PlanetField f;
// double u;
double *A = (double*)malloc(sizeof(double)*VECTOR_SIZE*VECTORS);
double *B = (double*)malloc(sizeof(double)*VECTOR_SIZE*VECTORS);
/*
A[0] = 30544.803906240781;
A[1] = 28708.707913342118;
A[2] = 4777.8592942243504;
*
double a[3];
*/
for (i=0; i<VECTORS; i++) {
A[i*VECTOR_SIZE+0] = fRand(-35000,35000);
A[i*VECTOR_SIZE+1] = fRand(-35000,35000);
A[i*VECTOR_SIZE+2] = fRand(-35000,35000);
A[i*VECTOR_SIZE+3] = 0;
}
double rtime = wtime();
/* for (i=0;i<3;i++)
if ( InitPlanetField(i,&f) ) {
printf ("%s %d %e\n",f.fname,f.count,f.nC20);
PlanetFieldAcc(128,&f,v,a,&u);
printf("%e %e %e\n",a[0],a[1],a[2]);
}
*/
for (i=0; i<VECTORS; i++) {
c_harmo(8,&A[i*VECTOR_SIZE],&B[i*VECTOR_SIZE]);
}
rtime = wtime() - rtime;
for(i = 0; i < VECTORS; i++) printf("%+10.3lf %+10.3lf %+10.3lf %+.16e %+.16e %+.16e\n", A[i*VECTOR_SIZE],A[i*VECTOR_SIZE+1],A[i*VECTOR_SIZE+2],B[i*VECTOR_SIZE],B[i*VECTOR_SIZE+1],B[i*VECTOR_SIZE+2]);
printf("%lf\n", rtime);
// }
// double *P = (double *)malloc( sizeof(double[81]) );
/* for ( i=0;i<10000000;i++)
c_harmo_(8,v,a);
printf("%.15e %.15e %.15e\n",a[0],a[1],a[2]);
legendre(8,0.2,sqrt(1-0.2*0.2),P);
for (i=0; i<8; i++) {
for (j=0; j<8; j++)
printf("%+.2e ",P[i*8+j]);
printf("\n");
}
printf("\n");
for ( i=0;i<10000000;i++)
c_harmo8(v,a);
printf("%.15e %.15e %.15e\n",a[0],a[1],a[2]);
legendre8(0.2,sqrt(1-0.2*0.2),P);
for (i=0; i<8; i++) {
for (j=0; j<8; j++)
printf("%+.2e ",P[i*8+j]);
printf("\n");
}
*/
// free (P);
/* printf("\n");
legendre8(0.2,sqrt(1-0.2*0.2),P);
for (i=0; i<8; i++) {
for (j=0; j<8; j++)
printf("%+.5e",P[i*8+j]);
printf("\n");
}
*/
/* v[0]=30554.803906240781;
v[1]=28718.707913342118;
v[2]=4787.8592942243504;
c_harmo(8,v,a);
printf("%e %e %e\n",a[0],a[1],a[2]);
*/
return 0;
}
#include <iostream>
#include <fstream>
#include <sstream>
#if defined(_WIN32)
#include <malloc.h> // needed for alloca
#endif // _WIN32
#if defined(linux) || defined(__APPLE__) || defined(__MACOSX)
# include <alloca.h>
#endif // linux
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
///
// Display information for a particular platform.
// Assumes that all calls to clGetPlatformInfo returns
// a value of type char[], which is valid for OpenCL 1.1.
//
void DisplayPlatformInfo(
cl_platform_id id,
cl_platform_info name,
std::string str)
{
cl_int errNum;
std::size_t paramValueSize;
errNum = clGetPlatformInfo(
id,
name,
0,
NULL,
&paramValueSize);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to find OpenCL platform " << str << "." << std::endl;
return;
}
char * info = (char *)alloca(sizeof(char) * paramValueSize);
errNum = clGetPlatformInfo(
id,
name,
paramValueSize,
info,
NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to find OpenCL platform " << str << "." << std::endl;
return;
}
std::cout << "\t" << str << ":\t\t" << info << std::endl;
}
template<typename T>
void appendBitfield(T info, T value, std::string name, std::string & str)
{
if (info & value)
{
if (str.length() > 0)
{
str.append(" | ");
}
str.append(name);
}
}
///
// Display information for a particular device.
// As different calls to clGetDeviceInfo may return
// values of different types a template is used.
// As some values returned are arrays of values, a templated class is
// used so it can be specialized for this case, see below.
//
template <typename T>
class InfoDevice
{
public:
static void display(
cl_device_id id,
cl_device_info name,
std::string str)
{
cl_int errNum;
std::size_t paramValueSize;
errNum = clGetDeviceInfo(
id,
name,
0,
NULL,
&paramValueSize);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to find OpenCL device info " << str << "." << std::endl;
return;
}
T * info = (T *)alloca(sizeof(T) * paramValueSize);
errNum = clGetDeviceInfo(
id,
name,
paramValueSize,
info,
NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to find OpenCL device info " << str << "." << std::endl;
return;
}
// Handle a few special cases
switch (name)
{
case CL_DEVICE_VENDOR_ID:
case CL_DEVICE_ADDRESS_BITS:
case CL_DEVICE_PLATFORM:
case CL_DEVICE_AVAILABLE:
case CL_DEVICE_MAX_SAMPLERS:
{
std::cout << "\t\t" << str << ":\t\t\t\t" << *info << std::endl;
}
break;
case CL_DEVICE_MAX_WORK_GROUP_SIZE:
case CL_DEVICE_MAX_COMPUTE_UNITS:
case CL_DEVICE_MAX_CLOCK_FREQUENCY:
case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
case CL_DEVICE_IMAGE2D_MAX_WIDTH:
case CL_DEVICE_IMAGE3D_MAX_WIDTH:
case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
case CL_DEVICE_IMAGE3D_MAX_DEPTH:
case CL_DEVICE_COMPILER_AVAILABLE:
case CL_DEVICE_ENDIAN_LITTLE:
case CL_DEVICE_HOST_UNIFIED_MEMORY:
case CL_DEVICE_MAX_CONSTANT_ARGS:
case CL_DEVICE_GLOBAL_MEM_SIZE:
case CL_DEVICE_LOCAL_MEM_SIZE:
case CL_DEVICE_MEM_BASE_ADDR_ALIGN:
case CL_DEVICE_MAX_PARAMETER_SIZE:
case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
case CL_DEVICE_MAX_READ_IMAGE_ARGS:
case CL_DEVICE_IMAGE_SUPPORT:
{
std::cout << "\t\t" << str << ":\t\t\t" << *info << std::endl;
}
break;
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
{
std::cout << "\t\t" << str << ":\t" << *info << std::endl;
}
break;
case CL_DEVICE_TYPE:
{
std::string deviceType="";
appendBitfield<cl_device_type>(
*(reinterpret_cast<cl_device_type*>(info)),
CL_DEVICE_TYPE_CPU,
"CL_DEVICE_TYPE_CPU",
deviceType);
appendBitfield<cl_device_type>(
*(reinterpret_cast<cl_device_type*>(info)),
CL_DEVICE_TYPE_GPU,
"CL_DEVICE_TYPE_GPU",
deviceType);
appendBitfield<cl_device_type>(
*(reinterpret_cast<cl_device_type*>(info)),
CL_DEVICE_TYPE_ACCELERATOR,
"CL_DEVICE_TYPE_ACCELERATOR",
deviceType);
appendBitfield<cl_device_type>(
*(reinterpret_cast<cl_device_type*>(info)),
CL_DEVICE_TYPE_DEFAULT,
"CL_DEVICE_TYPE_DEFAULT",
deviceType);
std::cout << "\t\t" << str << ":\t\t\t\t\t" << deviceType << std::endl;
}
break;
case CL_DEVICE_SINGLE_FP_CONFIG:
{
std::string fpType="";
appendBitfield<cl_device_fp_config>(
*(reinterpret_cast<cl_device_fp_config*>(info)),
CL_FP_DENORM,
"CL_FP_DENORM",
fpType);
appendBitfield<cl_device_fp_config>(
*(reinterpret_cast<cl_device_fp_config*>(info)),
CL_FP_INF_NAN,
"CL_FP_INF_NAN",
fpType);
appendBitfield<cl_device_fp_config>(
*(reinterpret_cast<cl_device_fp_config*>(info)),
CL_FP_ROUND_TO_NEAREST,
"CL_FP_ROUND_TO_NEAREST",
fpType);
appendBitfield<cl_device_fp_config>(
*(reinterpret_cast<cl_device_fp_config*>(info)),
CL_FP_ROUND_TO_ZERO,
"CL_FP_ROUND_TO_ZERO",
fpType);
appendBitfield<cl_device_fp_config>(
*(reinterpret_cast<cl_device_fp_config*>(info)),
CL_FP_ROUND_TO_INF,
"CL_FP_ROUND_TO_INF",
fpType);
appendBitfield<cl_device_fp_config>(
*(reinterpret_cast<cl_device_fp_config*>(info)),
CL_FP_FMA,
"CL_FP_FMA",
fpType);
#ifdef CL_FP_SOFT_FLOAT
appendBitfield<cl_device_fp_config>(
*(reinterpret_cast<cl_device_fp_config*>(info)),
CL_FP_SOFT_FLOAT,
"CL_FP_SOFT_FLOAT",
fpType);
#endif
std::cout << "\t\t" << str << ":\t\t\t" << fpType << std::endl;
}
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
{
std::string memType;
appendBitfield<cl_device_mem_cache_type>(
*(reinterpret_cast<cl_device_mem_cache_type*>(info)),
CL_NONE,
"\tCL_NONE",
memType);
appendBitfield<cl_device_mem_cache_type>(
*(reinterpret_cast<cl_device_mem_cache_type*>(info)),
CL_READ_ONLY_CACHE,
"\tCL_READ_ONLY_CACHE",
memType);
appendBitfield<cl_device_mem_cache_type>(
*(reinterpret_cast<cl_device_mem_cache_type*>(info)),
CL_READ_WRITE_CACHE,
"\tCL_READ_WRITE_CACHE",
memType);
std::cout << "\t\t" << str << ":\t\t" << memType << std::endl;
}
break;
case CL_DEVICE_LOCAL_MEM_TYPE:
{
std::string lmemType="";
appendBitfield<cl_device_local_mem_type>(
*(reinterpret_cast<cl_device_local_mem_type*>(info)),
CL_GLOBAL,
"CL_LOCAL",
lmemType);
appendBitfield<cl_device_local_mem_type>(
*(reinterpret_cast<cl_device_local_mem_type*>(info)),
CL_GLOBAL,
"CL_GLOBAL",
lmemType);
std::cout << "\t\t" << str << ":\t\t\t" << lmemType << std::endl;
}
break;
case CL_DEVICE_EXECUTION_CAPABILITIES:
{
std::string memType="";
appendBitfield<cl_device_exec_capabilities>(
*(reinterpret_cast<cl_device_exec_capabilities*>(info)),
CL_EXEC_KERNEL,
"CL_EXEC_KERNEL",
memType);
appendBitfield<cl_device_exec_capabilities>(
*(reinterpret_cast<cl_device_exec_capabilities*>(info)),
CL_EXEC_NATIVE_KERNEL,
"CL_EXEC_NATIVE_KERNEL",
memType);
std::cout << "\t\t" << str << ":\t\t" << memType << std::endl;
}
break;
case CL_DEVICE_QUEUE_PROPERTIES:
{
std::string memType="";
appendBitfield<cl_device_exec_capabilities>(
*(reinterpret_cast<cl_device_exec_capabilities*>(info)),
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
"CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE",
memType);
appendBitfield<cl_device_exec_capabilities>(
*(reinterpret_cast<cl_device_exec_capabilities*>(info)),
CL_QUEUE_PROFILING_ENABLE,
"CL_QUEUE_PROFILING_ENABLE",
memType);
std::cout << "\t\t" << str << ":\t\t\t" << memType << std::endl;
}
break;
default:
std::cout << "\t\t" << str << ":\t\t" << *info << std::endl;
break;
}
}
};
///
// Simple trait class used to wrap base types.
//
template <typename T>
class ArrayType
{
public:
static bool isChar() { return false; }
};
///
// Specialized for the char (i.e. null terminated string case).
//
template<>
class ArrayType<char>
{
public:
static bool isChar() { return true; }
};
///
// Specialized instance of class InfoDevice for array types.
//
template <typename T>
class InfoDevice<ArrayType<T> >
{
public:
static void display(
cl_device_id id,
cl_device_info name,
std::string str)
{
cl_int errNum;
std::size_t paramValueSize;
errNum = clGetDeviceInfo(
id,
name,
0,
NULL,
&paramValueSize);
if (errNum != CL_SUCCESS)
{
std::cerr
<< "Failed to find OpenCL device info "
<< str
<< "."
<< std::endl;
return;
}
T * info = (T *)alloca(sizeof(T) * paramValueSize);
errNum = clGetDeviceInfo(
id,
name,
paramValueSize,
info,
NULL);
if (errNum != CL_SUCCESS)
{
std::cerr
<< "Failed to find OpenCL device info "
<< str
<< "."
<< std::endl;
return;
}
if (ArrayType<T>::isChar())
{
std::cout << "\t " << str << ":\t\t" << info << std::endl;
}
else if (name == CL_DEVICE_MAX_WORK_ITEM_SIZES)
{
cl_uint maxWorkItemDimensions;
errNum = clGetDeviceInfo(
id,
CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(cl_uint),
&maxWorkItemDimensions,
NULL);
if (errNum != CL_SUCCESS)
{
std::cerr
<< "Failed to find OpenCL device info "
<< "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS."
<< std::endl;
return;
}
std::cout << "\t\t" << str << ":\t\t\t" ;
for (cl_uint i = 0; i < maxWorkItemDimensions; i++)
{
std::cout << info[i] << " ";
}
std::cout << std::endl;
}
}
};
///
// Enumerate platforms and display information about them
// and their associated devices.
//
void displayInfo(void)
{
cl_int errNum;
cl_uint numPlatforms;
cl_platform_id * platformIds;
cl_context context = NULL;
// First, query the total number of platforms
errNum = clGetPlatformIDs(0, NULL, &numPlatforms);
if (errNum != CL_SUCCESS || numPlatforms <= 0)
{
std::cerr << "Failed to find any OpenCL platform." << std::endl;
return;
}
// Next, allocate memory for the installed plaforms, and qeury
// to get the list.
platformIds = (cl_platform_id *)alloca(sizeof(cl_platform_id) * numPlatforms);
// First, query the total number of platforms
errNum = clGetPlatformIDs(numPlatforms, platformIds, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to find any OpenCL platforms." << std::endl;
return;
}
std::cout << "Number of platforms: \t" << numPlatforms << std::endl;
// Iterate through the list of platforms displaying associated information
for (cl_uint i = 0; i < numPlatforms; i++)
{
// First we display information associated with the platform
DisplayPlatformInfo(
platformIds[i],
CL_PLATFORM_PROFILE,
"CL_PLATFORM_PROFILE");
DisplayPlatformInfo(
platformIds[i],
CL_PLATFORM_VERSION,
"CL_PLATFORM_VERSION");
DisplayPlatformInfo(
platformIds[i],
CL_PLATFORM_VENDOR,
"CL_PLATFORM_VENDOR");
DisplayPlatformInfo(
platformIds[i],
CL_PLATFORM_EXTENSIONS,
"CL_PLATFORM_EXTENSIONS");
// Now query the set of devices associated with the platform
cl_uint numDevices;
errNum = clGetDeviceIDs(
platformIds[i],
CL_DEVICE_TYPE_ALL,
0,
NULL,
&numDevices);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to find OpenCL devices." << std::endl;
return;
}
cl_device_id * devices = (cl_device_id *)alloca(sizeof(cl_device_id) * numDevices);
errNum = clGetDeviceIDs(
platformIds[i],
CL_DEVICE_TYPE_ALL,
numDevices,
devices,
NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to find OpenCL devices." << std::endl;
return;
}
std::cout << "\tNumber of devices: \t" << numDevices << std::endl;
// Iterate through each device, displaying associated information
for (cl_uint j = 0; j < numDevices; j++)
{
InfoDevice<ArrayType<char> >::display(
devices[j],
CL_DEVICE_NAME,
"CL_DEVICE_NAME");
InfoDevice<ArrayType<char> >::display(
devices[j],
CL_DEVICE_VENDOR,
"CL_DEVICE_VENDOR");
InfoDevice<ArrayType<char> >::display(
devices[j],
CL_DEVICE_VERSION,
"CL_DEVICE_VERSION");
#ifdef CL_DEVICE_OPENCL_C_VERSION
InfoDevice<ArrayType<char> >::display(
devices[j],
CL_DEVICE_OPENCL_C_VERSION,
"CL_DEVICE_OPENCL_C_VERSION");
#endif
InfoDevice<ArrayType<char> >::display(
devices[j],
CL_DRIVER_VERSION,
"CL_DRIVER_VERSION");
InfoDevice<ArrayType<char> >::display(
devices[j],
CL_DEVICE_PROFILE,
"CL_DEVICE_PROFILE");
InfoDevice<ArrayType<char> >::display(
devices[j],
CL_DEVICE_EXTENSIONS,
"CL_DEVICE_EXTENSIONS");
InfoDevice<cl_device_type>::display(
devices[j],
CL_DEVICE_TYPE,
"CL_DEVICE_TYPE");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_VENDOR_ID,
"CL_DEVICE_VENDOR_ID");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_MAX_COMPUTE_UNITS,
"CL_DEVICE_MAX_COMPUTE_UNITS");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
"CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
InfoDevice<ArrayType<size_t> >::display(
devices[j],
CL_DEVICE_MAX_WORK_ITEM_SIZES,
"CL_DEVICE_MAX_WORK_ITEM_SIZES");
InfoDevice<std::size_t>::display(
devices[j],
CL_DEVICE_MAX_WORK_GROUP_SIZE,
"CL_DEVICE_MAX_WORK_GROUP_SIZE");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR,
"CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT,
"CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,
"CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,
"CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,
"CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
"CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE");
#ifdef CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF,
"CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,
"CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,
"CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,
"CL_DEVICE_NATIVE_VECTOR_WIDTH_INT");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,
"CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,
"CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,
"CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF,
"CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF");
#endif
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_MAX_CLOCK_FREQUENCY,
"CL_DEVICE_MAX_CLOCK_FREQUENCY");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_ADDRESS_BITS,
"CL_DEVICE_ADDRESS_BITS");
InfoDevice<cl_ulong>::display(
devices[j],
CL_DEVICE_MAX_MEM_ALLOC_SIZE,
"CL_DEVICE_MAX_MEM_ALLOC_SIZE");
InfoDevice<cl_bool>::display(
devices[j],
CL_DEVICE_IMAGE_SUPPORT,
"CL_DEVICE_IMAGE_SUPPORT");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_MAX_READ_IMAGE_ARGS,
"CL_DEVICE_MAX_READ_IMAGE_ARGS");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_MAX_WRITE_IMAGE_ARGS,
"CL_DEVICE_MAX_WRITE_IMAGE_ARGS");
InfoDevice<std::size_t>::display(
devices[j],
CL_DEVICE_IMAGE2D_MAX_WIDTH,
"CL_DEVICE_IMAGE2D_MAX_WIDTH");
InfoDevice<std::size_t>::display(
devices[j],
CL_DEVICE_IMAGE2D_MAX_HEIGHT,
"CL_DEVICE_IMAGE2D_MAX_HEIGHT");
InfoDevice<std::size_t>::display(
devices[j],
CL_DEVICE_IMAGE3D_MAX_WIDTH,
"CL_DEVICE_IMAGE3D_MAX_WIDTH");
InfoDevice<std::size_t>::display(
devices[j],
CL_DEVICE_IMAGE3D_MAX_HEIGHT,
"CL_DEVICE_IMAGE3D_MAX_HEIGHT");
InfoDevice<std::size_t>::display(
devices[j],
CL_DEVICE_IMAGE3D_MAX_DEPTH,
"CL_DEVICE_IMAGE3D_MAX_DEPTH");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_MAX_SAMPLERS,
"CL_DEVICE_MAX_SAMPLERS");
InfoDevice<std::size_t>::display(
devices[j],
CL_DEVICE_MAX_PARAMETER_SIZE,
"CL_DEVICE_MAX_PARAMETER_SIZE");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_MEM_BASE_ADDR_ALIGN,
"CL_DEVICE_MEM_BASE_ADDR_ALIGN");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE,
"CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE");
InfoDevice<cl_device_fp_config>::display(
devices[j],
CL_DEVICE_SINGLE_FP_CONFIG,
"CL_DEVICE_SINGLE_FP_CONFIG");
InfoDevice<cl_device_mem_cache_type>::display(
devices[j],
CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
"CL_DEVICE_GLOBAL_MEM_CACHE_TYPE");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE,
"CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE");
InfoDevice<cl_ulong>::display(
devices[j],
CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,
"CL_DEVICE_GLOBAL_MEM_CACHE_SIZE");
InfoDevice<cl_ulong>::display(
devices[j],
CL_DEVICE_GLOBAL_MEM_SIZE,
"CL_DEVICE_GLOBAL_MEM_SIZE");
InfoDevice<cl_ulong>::display(
devices[j],
CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
"CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE");
InfoDevice<cl_uint>::display(
devices[j],
CL_DEVICE_MAX_CONSTANT_ARGS,
"CL_DEVICE_MAX_CONSTANT_ARGS");
InfoDevice<cl_device_local_mem_type>::display(
devices[j],
CL_DEVICE_LOCAL_MEM_TYPE,
"CL_DEVICE_LOCAL_MEM_TYPE");
InfoDevice<cl_ulong>::display(
devices[j],
CL_DEVICE_LOCAL_MEM_SIZE,
"CL_DEVICE_LOCAL_MEM_SIZE");
InfoDevice<cl_bool>::display(
devices[j],
CL_DEVICE_ERROR_CORRECTION_SUPPORT,
"CL_DEVICE_ERROR_CORRECTION_SUPPORT");
#ifdef CL_DEVICE_HOST_UNIFIED_MEMORY
InfoDevice<cl_bool>::display(
devices[j],
CL_DEVICE_HOST_UNIFIED_MEMORY,
"CL_DEVICE_HOST_UNIFIED_MEMORY");
#endif
InfoDevice<std::size_t>::display(
devices[j],
CL_DEVICE_PROFILING_TIMER_RESOLUTION,
"CL_DEVICE_PROFILING_TIMER_RESOLUTION");
InfoDevice<cl_bool>::display(
devices[j],
CL_DEVICE_ENDIAN_LITTLE,
"CL_DEVICE_ENDIAN_LITTLE");
InfoDevice<cl_bool>::display(
devices[j],
CL_DEVICE_AVAILABLE,
"CL_DEVICE_AVAILABLE");
InfoDevice<cl_bool>::display(
devices[j],
CL_DEVICE_COMPILER_AVAILABLE,
"CL_DEVICE_COMPILER_AVAILABLE");
InfoDevice<cl_device_exec_capabilities>::display(
devices[j],
CL_DEVICE_EXECUTION_CAPABILITIES,
"CL_DEVICE_EXECUTION_CAPABILITIES");
InfoDevice<cl_command_queue_properties>::display(
devices[j],
CL_DEVICE_QUEUE_PROPERTIES,
"CL_DEVICE_QUEUE_PROPERTIES");
InfoDevice<cl_platform_id>::display(
devices[j],
CL_DEVICE_PLATFORM,
"CL_DEVICE_PLATFORM");
std::cout << std::endl << std::endl;
}
}
}
///
// main() for OpenCLInfo example
//
int main(int argc, char** argv)
{
cl_context context = 0;
displayInfo();
return 0;
}
module test1
implicit none
contains
subroutine HrmEgm (N,x,f)
implicit none
integer N
double precision x(*),f(*)
interface
subroutine c_harmo (N,x,f) bind(c)
use,intrinsic :: iso_c_binding
integer(c_int),value,intent(in) :: N
real(c_double),intent(in) :: x(*)
real(c_double),intent(out) :: f(*)
end subroutine c_harmo
end interface
call c_harmo(N,x,f)
end subroutine
end module test1
program test
use test1
double precision x(3),f(3)
data x(1) / 30544.803906240781D00 /
data x(2) / 28708.707913342118D00 /
data x(3) / 4777.8592942243504D00 /
call HrmEgm(8,x,f)
write (*,*) f(1),f(2),f(3)
call HrmEgm96(8,x,f)
write (*,*) f(1),f(2),f(3)
end program
\ No newline at end of file
#ifdef _OPENMP
#include <omp.h>
#else
#include <sys/time.h>
#endif
#include <stdlib.h>
double wtime()
{
#ifdef _OPENMP
/* Use omp_get_wtime() if we can */
return omp_get_wtime();
#else
/* Use a generic timer */
static int sec = -1;
struct timeval tv;
gettimeofday(&tv, NULL);
if (sec < 0) sec = tv.tv_sec;
return (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec;
#endif
}
memc_gate
!*.c
!*.h
!*.new
!*.f
!.gitingnore
PLATFORM=$(shell uname -s |awk -F- '{print $$1}')
BIN = ../build/$(PLATFORM)/bin
LIBS = ../build/$(PLATFORM)/lib
BUILD = ../build/$(PLATFORM)/obj/memc_gate
SHARE = ../build/$(PLATFORM)/share
INCLUDE = ../build/$(PLATFORM)/include
MODDIR = ../build/$(PLATFORM)/obj/mod
TARGET=memc_gate
TARGLIB=$(LIBS)/libmemc_gate.a
TARGF=test
F=.f
C=.c
SLV=solvers
OBJ=$(BUILD)/memc_main.o
OBJLIB= $(BUILD)/memc_gate.o
OBJF=$(BUILD)/test.o
GCC=gcc
FC=gfortran
AR=ar
GCCFLAGS += -g -Wall -c -O -I/opt/local/include -I/usr/local/include -I$(INCLUDE)
GCCFLAGS += $(FCEXTRA)
FCFLAGS += -Wall -ffree-line-length-none -c -O -J$(MODDIR) -cpp
FCFLAGS += $(FCEXTRA)
LDFLAGS = -lgfortran -lmemc_gate -lgravity -lstdc++ -lmemcached -lsofa_c -lm -L$(LIBS) -L/usr/local/lib -L/opt/local/lib -L/opt/local/lib/gcc48 -L/usr/local/lib/gcc48
ifeq ($(PLATFORM),FreeBSD)
LDFLAGS += -lexecinfo -rpath /usr/local/lib/gcc48
endif
LDFLAGS += $(FCEXTRA)
RM=rm
all: dirs $(TARGLIB) $(TARGET) $(TARGF)
dirs:
mkdir -p $(BIN) $(LIBS) $(BUILD)
cp memc_gate.h $(INCLUDE)
$(TARGLIB) : $(OBJLIB)
$(AR) rvs $@ $(OBJLIB)
$(TARGET) : $(OBJ) $(OBJLIB)
$(GCC) -O -o $@ $(OBJ) $(LDFLAGS)
$(TARGF) : $(OBJF)
$(FC) -O -o $@ $(OBJF) $(LDFLAGS)
$(BUILD)/%.o : %.c
$(GCC) $(GCCFLAGS) -c $< -o $@
$(BUILD)/%.o : %.f
$(FC) $(FCFLAGS) -c $< -o $@
clean:
rm -rf $(BUILD) $(TARGET) $(TARGLIB) $(TARGF)
#all: memc_gate.c
# gcc -Wall -I/usr/local/include -L/usr/local/lib -L/usr/home/viking/develop/solvers/build/FreeBSD/lib -lmemcached -lsofa_c -lm -lexecinfo -lstdc++ -lc -o memc_gate memc_gate.c
double precision function iau_ee00a (dt1,dt2)
implicit none
double precision dt1,dt2
interface
real(c_double) function memc_iau_ee00a (dt1,dt2) bind(c)
use,intrinsic :: iso_c_binding
real(c_double),value,intent(in) :: dt1
real(c_double),value,intent(in) :: dt2
end function memc_iau_ee00a
end interface
iau_ee00a = memc_iau_ee00a(dt1,dt2)
end function
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <libmemcached/memcached.h>
#include <sofa.h>
#include "memc_gate.h"
#define SZOC sizeof(char)
#define SZOI sizeof(int)
#define SZOD sizeof(double)
#define TRUE 1
#define FALSE !TRUE
// #define STRKEYS // long string keys
// #define DEBUG // as is
memcached_st *memc;
char first_run = TRUE;
void close_memc_connect() {
memcached_free(memc);
}
extern void legendre(int N, double sin_fi, double cos_fi, double *P);
#ifdef DEBUG
void debug_keys(int keylen, char* key) {
#ifndef STRKEYS
int j;
#endif
fprintf(stderr,"KEYLEN:%d\n",(int)keylen);
#ifdef STRKEYS
fprintf(stderr,"KEY:%s\n",key);
#else
fprintf(stderr,"HEXKEY:");
for (j=0;j<keylen;j++)
fprintf(stderr,"%02x ",(unsigned char)key[j]);
fprintf(stderr,"\n");
#endif
}
#endif
void memc_iau_nut00a(double dt1, double dt2, double *dpsi, double *deps ) {
char fn = '1'; // unique function descriptor
#ifdef STRKEYS
size_t keylen = 0;
char key[MEMCACHED_MAX_KEY] = "";
#else
size_t keylen = SZOC+SZOD*2;
char key[SZOC+SZOD*2] = "";
#endif
uint32_t flags = 0;
size_t length = 0;
char *retval = NULL;
memcached_return_t rc;
char val[SZOD*2]; // 16 bytes for values
#ifdef STRKEYS
sprintf(key,"%c%+-.9f%+-.9f",fn,dt1,dt2);
keylen = strlen(key);
#else
memcpy(key , &fn, SZOC);
memcpy(key + SZOC , &dt1, SZOD);
memcpy(key + SZOC + SZOD, &dt2, SZOD);
#endif
#ifdef DEBUG
debug_keys(keylen,key);
#endif
if (first_run) {
const char *config_string= "--SOCKET=\"/tmp/memcached.sock\" --BINARY-PROTOCOL";
// add --NAMESPACE=... later ??? for simple function results isolation
memc = memcached(config_string, strlen(config_string));
first_run = ! first_run;
}
retval = memcached_get (memc, key, keylen, &length, &flags, &rc);
if (rc != MEMCACHED_SUCCESS) {
#ifdef DEBUG
fprintf(stderr,"%d: %s\n", rc, memcached_strerror(memc,rc));
#endif
iauNut00a(dt1,dt2,dpsi,deps);
memcpy(val , dpsi, SZOD);
memcpy(val + SZOD, deps, SZOD);
rc = memcached_set(memc, key, keylen, val, SZOD*2, (time_t)0, (uint32_t)0);
#ifdef DEBUG
if (rc != MEMCACHED_SUCCESS)
fprintf(stderr,"%d: %s\n", rc, memcached_strerror(memc,rc));
#endif
} else {
memcpy(dpsi,retval , SZOD);
memcpy(deps,retval + SZOD, SZOD);
}
free(retval);
}
double memc_iau_ee00a ( double dt1, double dt2 ) {
char fn = '2'; // unique function descriptor
#ifdef STRKEYS
size_t keylen = 0;
char key[MEMCACHED_MAX_KEY] = "";
#else
size_t keylen = SZOC+SZOD*2;
char key[SZOC+SZOD*2] = "";
#endif
uint32_t flags = 0;
size_t length = 0;
char *retval = NULL;
memcached_return_t rc;
double ee00a;
char val[SZOD]; // 8 bytes for values
#ifdef STRKEYS
sprintf(key,"%c%+-.9f%+-.9f",fn,dt1,dt2);
keylen = strlen(key);
#else
memcpy(key , &fn, SZOC);
memcpy(key + SZOC , &dt1, SZOD);
memcpy(key + SZOC + SZOD, &dt2, SZOD);
#endif
#ifdef DEBUG
debug_keys(keylen,key);
#endif
if (first_run) {
const char *config_string= "--SOCKET=\"/tmp/memcached.sock\" --BINARY-PROTOCOL";
memc = memcached(config_string, strlen(config_string));
first_run = ! first_run;
}
retval = memcached_get (memc, key, keylen, &length, &flags, &rc);
if (rc != MEMCACHED_SUCCESS) {
#ifdef DEBUG
fprintf(stderr,"%d: %s\n", rc, memcached_strerror(memc,rc));
#endif
ee00a = iauEe00a(dt1,dt2);
memcpy(val , &ee00a, SZOD);
rc = memcached_set(memc, key, keylen, val, SZOD, (time_t)0, (uint32_t)0);
#ifdef DEBUG
if (rc != MEMCACHED_SUCCESS)
fprintf(stderr,"%d: %s\n", rc, memcached_strerror(memc,rc));
#endif
} else {
memcpy(&ee00a,retval , SZOD);
}
free(retval);
return ee00a;
}
void memc_legendre(int N, double sin_fi, double cos_fi , double *P){
char fn = '3'; // unique function descriptor
#ifdef STRKEYS
size_t keylen = 0;
char key[MEMCACHED_MAX_KEY] = "";
#else
size_t keylen = SZOC+SZOI+SZOD;
char key[SZOC+SZOI+SZOD] = "";
#endif
uint32_t flags = 0;
size_t length = 0;
char *retval = NULL;
memcached_return_t rc;
double r_sin_fi, r_cos_fi;
int n, m, i;
char *val;
i = sizeof(double[N+1][N+1]);
val = (char *)malloc( i );
if ( ! ( ( sin_fi < 0.35 ) && ( sin_fi > -0.35 ) ) ) {
legendre( N, sin_fi, cos_fi, P);
} else {
#define PREC 10000000
r_sin_fi = round( ( sin_fi ) * PREC ) / PREC;
r_cos_fi = round( ( cos_fi ) * PREC ) / PREC;
#ifdef STRKEYS
sprintf(key,"%c%d%+-.8f",fn,N,r_sin_fi);
keylen = strlen(key);
#else
memcpy(key , &fn, SZOC);
memcpy(key + SZOC , &N, SZOI);
memcpy(key + SZOC + SZOI, &r_sin_fi, SZOD);
#endif
#ifdef DEBUG
debug_keys(keylen,key);
#endif
if (first_run) {
const char *config_string= "--SOCKET=\"/tmp/memcached.sock\" --BINARY-PROTOCOL";
memc = memcached(config_string, strlen(config_string));
first_run = ! first_run;
}
retval = memcached_get (memc, key, keylen, &length, &flags, &rc);
#ifdef DEBUG
if (rc != MEMCACHED_SUCCESS)
fprintf(stderr,"%d: %s\n", rc, memcached_strerror(memc,rc));
#endif
if ( rc == 16 ) { // NOT FOUND
legendre( N, r_sin_fi, r_cos_fi, P);
i = 0;
for( m = 0; m <= N; m++ )
for( n = m; n <= N; n++ ) {
memcpy(val+i*SZOD, &P[n*N+m], SZOD);
i++;
}
rc = memcached_set(memc, key, keylen, val, SZOD*i, (time_t)0, (uint32_t)0);
#ifdef DEBUG
if (rc != MEMCACHED_SUCCESS)
fprintf(stderr,"%d: %s\n", rc, memcached_strerror(memc,rc));
#endif
} else {
if ( rc == 0 ) { // FOUND
i=0;
for( m = 0; m <= N; m++ )
for( n = m; n <= N; n++ ) {
memcpy(&P[n*N+m],retval+i*SZOD, SZOD);
i++;
}
} else // NO CONNECTION & OTHER ERRORS
legendre( N, sin_fi, cos_fi, P);
}
free(retval);
}
free(val);
}
void memc_iau_nut00a (double dt1, double dt2, double *dpsi, double *deps);
double memc_iau_ee00a (double dt1, double dt2);
void memc_legendre (int N, double sin_fi, double cos_fi , double *P);
void close_memc_connect ();
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <libmemcached/memcached.h>
#include <sofa.h>
#include "memc_gate.h"
int main( void )
{
int i,n,m;
double dt1 = 2451547.1,dt2 = -1423.1;
double dpsi,deps;
double *P;
int N=8;
P = (double *)malloc( sizeof(double)*(N+1)*(N+1) );
memset( (void *)P, 0, sizeof(double)*(N+1)*(N+1) );
for(i=0;i<1;i++) {
iauNut00a(dt1,dt2,&dpsi,&deps);
fprintf(stderr,"%e\n",dpsi);
fprintf(stderr,"%e\n",deps);
memc_iau_nut00a(dt1,dt2,&dpsi,&deps);
fprintf(stderr,"%e\n",dpsi);
fprintf(stderr,"%e\n",deps);
fprintf(stderr,"%e\n",iauEe00a(dt1,dt2));
fprintf(stderr,"%e\n",memc_iau_ee00a(dt1,dt2));
memc_legendre(N, 0.15,0.98868599666426,P);
for( m = 0; m <= N; m++ ) {
for( n = m; n <= N; n++ ) {
fprintf(stderr,"%.15e ",P[n*N+m]);
}
fprintf(stderr,"\n");
}
}
free(P);
return 0;
}
subroutine iau_nut00a (dt1,dt2,dpsi,deps)
implicit none
double precision dt1,dt2,dpsi,deps
interface
subroutine memc_iau_nut00a (dt1,dt2,dpsi,deps) bind(c)
use,intrinsic :: iso_c_binding
real(c_double),value,intent(in) :: dt1
real(c_double),value,intent(in) :: dt2
real(c_double),intent(out) :: dpsi
real(c_double),intent(out) :: deps
end subroutine memc_iau_nut00a
end interface
call memc_iau_nut00a(dt1,dt2,dpsi,deps)
end subroutine
module test1
implicit none
contains
subroutine iau_nut00a (dt1,dt2,dpsi,deps)
implicit none
double precision dt1,dt2,dpsi,deps
interface
subroutine memc_iau_nut00a (dt1,dt2,dpsi,deps) bind(c)
use,intrinsic :: iso_c_binding
real(c_double),value,intent(in) :: dt1
real(c_double),value,intent(in) :: dt2
real(c_double),intent(out) :: dpsi
real(c_double),intent(out) :: deps
end subroutine memc_iau_nut00a
subroutine close_memc_connect() bind(c)
use, intrinsic :: iso_c_binding
end subroutine close_memc_connect
end interface
call memc_iau_nut00a(dt1,dt2,dpsi,deps)
end subroutine
double precision function iau_ee00a (dt1,dt2)
implicit none
double precision dt1,dt2
interface
real(c_double) function memc_iau_ee00a (dt1,dt2) bind(c)
use,intrinsic :: iso_c_binding
real(c_double),value,intent(in) :: dt1
real(c_double),value,intent(in) :: dt2
end function memc_iau_ee00a
end interface
iau_ee00a = memc_iau_ee00a(dt1,dt2)
end function
end module test1
program test
use test1
double precision adt1,adt2,adpsi,adeps,ee
adt1 = 2451547.1D00
adt2 = -1423.1D00
call iau_nut00a(adt1,adt2,adpsi,adeps)
ee = iau_ee00a (adt1,adt2)
write (*,*) adpsi,adeps,ee
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment