'Finding a prime factor using Cuda

I was not able to find other topics about finding the largest prime factor of a number using Cuda and I am having some issues.

#include <cuda.h>
#include <math.h>
#include <stdio.h>

__device__ int checkFactor (long long product, long long factor)
{
return product % factor == 0;
}

__global__ void factorKernel (long long product, long long limit, long long *result)
{
/******************/
/* Your code here */
/******************/

/* 1. Calculate f from thread ID    */
long long f = threadIdx.x;

/* 2. Caluclate number of thread    */
int numThreads = blockIdx.x * blockDim.x;

/* 3. While f is within limit       */
/* 4.   Check whether f is a factor */
/* 5.   If yes, write f to answer   */
/* 6.   Increment f appropriately   */

while(f < limit)
{
    if(checkFactor(product,f))
    {
        result = &f;
    }
    f += numThreads;
}

}

long long factor (long long product)
{
if (product % 2 == 0)
{
    return 2;
}
long long limit = (long long) sqrt ((double) product);

long long result = 1;
long long *dResult;

/******************/
/* Your code here */
/******************/
dim3 gridDim(256);      /* Create 256 blocks */
dim3 blockDim(256);     /* Create 256 threads per block */

/* 1. Allocate memory for dResult   */
cudaMalloc((void**) &dResult, sizeof(dResult));

/* 2. Memcpy result into dResult    */
cudaMemcpy(dResult, &result, sizeof(result), cudaMemcpyHostToDevice);

/* 3. Launch the kernel             */
factorKernel<<<gridDim, blockDim>>>(product, limit, dResult);

/* 4. Memcpy dResult back to result */
cudaMemcpy(&result, dResult, sizeof(dResult), cudaMemcpyDeviceToHost);

/* 5. Free dResult                  */   
cudaFree(dResult);

return result;
}

int main (int argc, char **argv)
{
long long product = atoll (argv [1]);   /* convert argument to long long */
long long f = factor (product);     /* call the factor function */

if (f == 1)
{
    printf ("%ld is a prime number.\n", product);
}
else
{
    printf ("%ld has a prime factor of %ld.\n", product, f);
}
return 0;
}

What this program was suppose to do is check if the threadIdx.x is within the calculated limit. If it was it would then use that threadIdx.x to checkFactor. If that treadIdx.x was a factor I wanted to set result equal to that threadIdx.x and then that would be a prime factor for the number.

Compiled nvcc -o pfactor pfactor.cu

Executed: ./pfactor 11010010001

Expected Result: 23

Actual ~program hangup

I am not sure why the program doesnt stop running.



Solution 1:[1]

Why are you setting result to a reference to f? The &f sets result to the address of f which is not what you want.

Switch:

    result = &f;

To:

   *result = f;

Other generic question, can you compile the CUDA examples?

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1