We would like to know how many threads a program is using and we would like each thread to say hello to us.
Consider the following program:
#include <cstdio>
#include "omp.h"
int main()
{
printf("Thread #%d reports: This program uses %d threads.\n", omp_get_thread_num(), omp_get_num_threads());
#pragma omp parallel
printf("Hello world from thread #%d\n", omp_get_thread_num());
return 0;
}
Can you make only thread 5 report the correct number of threads?
What is the number of physical cores available on the compute nodes of Hamilton7?
The following string contains a scrambled message:
Zpv!uijol!zpvs!qbjo!boe!zpvs!ifbsucsfbl!bsf!voqsfdfefoufe!jo!uif!ijtupsz!pg!uif!xpsme-!cvu!uifo!zpv!sfbe/
This code was used for encryption:
#include <iostream>
#include <string.h>
const std::string mymessage {"/*Missing code: the original message*/"};
char* encode(const std::string message)
{
const auto sz = message.size();
char * result = static_cast<char*>(malloc(sizeof(result) * (sz+1)));
memcpy(result, message.data(), sz);
// Missing code:
// for-loop that increments each element of result by 1
return result;
}
int main()
{
std::cout << encode(mymessage) << "\n";
return 0;
}
Hint:
Use suitable pragmas and run the decryption thread-parallel.
Insert a statement in the for-loop that prints the current thread number and decryption result.
Run the code repeatedly and vary the number of threads (OMP_NUM_THREADS). What do you observe?
You are provided with an encrypted PGM type image. The following code has been used for encryption:
#define cimg_display 0 // This gets around the need to link to X11
#include "CImg.h"
#include <cstdio>
using namespace cimg_library;
int main(int argc, char * argv[]) {
// Load image.
CImg<float> img = CImg<float>(argv[1]);
const int w = img.width();
const int h = img.height();
// Encode
for (int r=0;r<300;r++)
{
for (int y=0;y<h;y++)
{
for (int x=0;x<w;x++)
{
float buf = img( (x+r+y)%w, y);
img( (x+r+y)%w, y) = img(x, y);
img(x , y) = buf;
}
}
}
// Write encoded image
img.save("encoded.pgm");
return 0;
}
Hints:
module load gcc/9.3.0
g++ decoder.cxx -I . -lpthread -fopenmp -o decoder
./decoder encoded.pgm
Hint:
Hint:
Images in Portable Gray Map (PGM) format store a single grayscale value between 0 and 255 in each pixel.
CImg is a lightweight, header-only, image library for C/C++. Among other things it allows to read, manipulate and write images. Once loaded, the individual pixels of an image can be accessed using the pixel coordinates \(0\leq x\lt w\) and \(0\leq y\lt h\), where \(w\) and \(h\) are image width and height, respectively. CImg is available here: https://cimg.eu/
The task is to sum all numbers between 1 and \(10^9\) in parallel. We can cross-check our result using the relation \(\sum\limits_{i=1}^N = \frac{N\cdot(N+1)}{2}\)
#include <cstdio>
#include "omp.h"
int main()
{
unsigned long N = 1000000000;
unsigned long SUMS[4] = {0,0,0,0};
#pragma omp parallel for
for (unsigned long i=1;i<N+1;i++)
{
SUMS[omp_get_thread_num()]+=i;
}
unsigned long result = SUMS[0] + SUMS[1] + SUMS[2] + SUMS[3];
printf("Result: %lu\n", result);
return 0;
}
int main()
{
unsigned long N = 1000000000;
unsigned long result = 0;
#pragma omp parallel
{
unsigned long temp = 0;
#pragma omp for
for (unsigned long i=1;i<N+1;i++)
{
temp += i;
}
#pragma opm atomic
result += temp;
}
printf("Result: %lu\n", result);
return 0;
}
Is this code correct? What does the atomic do and why is it necessary?
int main()
{
unsigned long N = 1000000000;
unsigned long result = 0;
#pragma omp parallel for reduction(+:result)
for (unsigned long i=1;i<N+1;i++)
{
result += i;
}
printf("Result: %lu\n", result);
return 0;
}