X Tutup
Skip to content

Commit 16cbcaa

Browse files
committed
JavaCL: basic support for OpenCL 1.2 device fission (issue #232)
1 parent 8effbf2 commit 16cbcaa

File tree

2 files changed

+174
-0
lines changed

2 files changed

+174
-0
lines changed

Core/src/main/velocity/com/nativelibs4java/opencl/CLDevice.java

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,18 @@ public String getDriverVersion() {
633633
return infos.getString(getEntity(), CL_DRIVER_VERSION);
634634
}
635635
636+
/**
637+
* Affinity domain specified in {@link #createSubDevicesByAffinity()}, or null if the device is not a sub-device or wasn't split by affinity.
638+
* This returns part of CL_DEVICE_PARTITION_TYPE.
639+
*/
640+
public AffinityDomain getPartitionAffinityDomain() {
641+
Pointer<?> memory = infos.getMemory(getEntity(), CL_DEVICE_PARTITION_TYPE);
642+
AffinityDomain affinityDomain = AffinityDomain.getEnum(memory.getLongAtOffset(0));
643+
return affinityDomain;
644+
}
645+
646+
647+
636648
/**
637649
* OpenCL profile string. <br/>
638650
* Returns the profile name supported by the device. <br/>
@@ -922,4 +934,115 @@ public static EnumSet<QueueProperties> getEnumSet(long v) {
922934
public EnumSet<QueueProperties> getQueueProperties() {
923935
return QueueProperties.getEnumSet(infos.getIntOrLong(getEntity(), CL_DEVICE_QUEUE_PROPERTIES));
924936
}
937+
938+
/** Enums values for cl_device_affinity_domain */
939+
public enum AffinityDomain implements com.nativelibs4java.util.ValuedEnum {
940+
/** Split the device into sub-devices comprised of compute units that share a NUMA node. */
941+
NUMA(CL_DEVICE_AFFINITY_DOMAIN_NUMA),
942+
/** Split the device into sub-devices comprised of compute units that share a level 4 data cache. */
943+
L4Cache(CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE),
944+
/** Split the device into sub-devices comprised of compute units that share a level 3 data cache. */
945+
L3Cache(CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE),
946+
/** Split the device into sub-devices comprised of compute units that share a level 2 data cache. */
947+
L2Cache(CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE),
948+
/** Split the device into sub-devices comprised of compute units that share a level 1 data cache. */
949+
L1Cache(CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE),
950+
/**
951+
* Split the device along the next partitionable affinity domain. The implementation shall finde
952+
* first level along which the device or sub-device may be further subdivided in the order NUMA,
953+
* L4, L3, L2, L1, and partition the device into sub-devices comprised of compute units that share
954+
* memory subsystems at this level.
955+
*/
956+
NextPartitionable(CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE);
957+
958+
AffinityDomain(long value) { this.value = value; }
959+
long value;
960+
@Override
961+
public long value() { return value; }
962+
963+
public static AffinityDomain getEnum(long v) {
964+
return EnumValues.getEnum(v, AffinityDomain.class);
965+
}
966+
}
967+
968+
/**
969+
* Creates an array of sub-devices that each reference a non-intersecting set of compute units within this device.
970+
* Split the aggregate device into as many smaller aggregate devices as can be created, each containing n compute units. The value n is passed as the value accompanying this property. If n does not divide evenly into CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS, then the remaining compute units are not used.
971+
#documentCallsFunction("clCreateSubDevices")
972+
* @param computeUnitsForEachSubDevice Count of compute units for every subdevice.
973+
#documentEventsToWaitForAndReturn()
974+
*/
975+
public CLDevice[] createSubDevicesEqually(long computeUnitsForEverySubDevices) {
976+
return createSubDevices(pointerToLongs(
977+
CL_DEVICE_PARTITION_EQUALLY, computeUnitsForEverySubDevices, 0
978+
));
979+
}
980+
981+
/**
982+
* Creates an array of sub-devices that each reference a non-intersecting set of compute units within this device.
983+
* For each nonzero count m in the list, a sub-device is created with m compute units in it.
984+
* The number of non-zero count entries in the list may not exceed CL_DEVICE_PARTITION_MAX_SUB_DEVICES.
985+
* The total number of compute units specified may not exceed CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS.
986+
#documentCallsFunction("clCreateSubDevices")
987+
* @param computeUnitsForEachSubDevice List of counts of compute units for each subdevice.
988+
#documentEventsToWaitForAndReturn()
989+
*/
990+
public CLDevice[] createSubDevicesByCounts(long... computeUnitsForEachSubDevice) {
991+
Pointer<Long> pProperties = allocateLongs(1 + computeUnitsForEachSubDevice.length + 1 + 1);
992+
pProperties.setLongAtIndex(0, CL_DEVICE_PARTITION_BY_COUNTS);
993+
pProperties.setLongsAtOffset(8, computeUnitsForEachSubDevice);
994+
// This leaves two last longs as CL_DEVICE_PARTITION_BY_COUNTS_LIST_END=0 and 0 (end of properties).
995+
return createSubDevices(pProperties);
996+
}
997+
998+
/**
999+
* Creates an array of sub-devices that each reference a non-intersecting set of compute units within this device.
1000+
* Split the device into smaller aggregate devices containing one or more compute units that all share part of a cache hierarchy.
1001+
* The user may determine what happened by calling clGetDeviceInfo (CL_DEVICE_PARTITION_TYPE) on the sub-devices.
1002+
#documentCallsFunction("clCreateSubDevices")
1003+
* @param affinityDomain Affinity domain along which devices should be split.
1004+
#documentEventsToWaitForAndReturn()
1005+
*/
1006+
public CLDevice[] createSubDevicesByAffinity(AffinityDomain affinityDomain) {
1007+
return createSubDevices(pointerToLongs(
1008+
affinityDomain.value(), 0
1009+
));
1010+
}
1011+
1012+
/**
1013+
* Creates an array of sub-devices that each reference a non-intersecting set of compute units within this device.
1014+
#documentCallsFunction("clCreateSubDevices")
1015+
#documentEventsToWaitForAndReturn()
1016+
*/
1017+
CLDevice[] createSubDevices(Pointer<Long> pProperties) {
1018+
platform.requireMinVersionValue("clEnqueueMigrateMemObjects", 1.2);
1019+
1020+
#declareReusablePtrs()
1021+
Pointer<Integer> pNum = ptrs.int1;
1022+
error(CL.clCreateSubDevices(getEntity(), getPeer(pProperties), 0, 0, getPeer(pNum)));
1023+
int num = pNum.getInt();
1024+
1025+
Pointer<SizeT> pDevices = allocateSizeTs(num);
1026+
error(CL.clCreateSubDevices(getEntity(), getPeer(pProperties), num, getPeer(pDevices), 0));
1027+
CLDevice[] devices = new CLDevice[(int) num];
1028+
for (int i = 0; i < num; i++) {
1029+
devices[i] = new CLDevice(platform, pDevices.getSizeTAtIndex(i), true);
1030+
}
1031+
pDevices.release();
1032+
return devices;
1033+
}
1034+
1035+
/**
1036+
#documentCallsFunction("clEnqueueMigrateMemObjects")
1037+
* @param queue
1038+
#documentEventsToWaitForAndReturn()
1039+
*/
1040+
/*
1041+
public CLEvent enqueueMigrateMemObjects(CLQueue queue, CLEvent... eventsToWaitFor) {
1042+
context.getPlatform().requireMinVersionValue("clEnqueueMigrateMemObjects", 1.2);
1043+
#declareReusablePtrsAndEventsInOut()
1044+
error(CL.clEnqueueMigrateMemObjects(queue.getEntity(), getEntity(), #eventsInOutArgsRaw()));
1045+
#returnEventOut("queue")
1046+
}
1047+
*/
9251048
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* To change this template, choose Tools | Templates
3+
* and open the template in the editor.
4+
*/
5+
6+
package com.nativelibs4java.opencl;
7+
8+
import static com.nativelibs4java.util.NIOUtils.directBuffer;
9+
import static com.nativelibs4java.util.NIOUtils.get;
10+
import static com.nativelibs4java.util.NIOUtils.put;
11+
import static org.junit.Assert.*;
12+
13+
import java.nio.*;
14+
15+
import org.junit.BeforeClass;
16+
import org.junit.Test;
17+
18+
import com.nativelibs4java.test.MiscTestUtils;
19+
import com.nativelibs4java.util.NIOUtils;
20+
import org.bridj.*;
21+
import java.nio.ByteOrder;
22+
import static org.bridj.Pointer.*;
23+
import java.nio.ByteOrder;
24+
import java.util.List;
25+
import org.junit.runners.Parameterized;
26+
27+
/**
28+
*
29+
* @author ochafik
30+
*/
31+
public class DeviceTest extends AbstractCommon {
32+
public DeviceTest(CLDevice device) {
33+
super(device);
34+
}
35+
36+
@Parameterized.Parameters
37+
public static List<Object[]> getDeviceParameters() {
38+
return AbstractCommon.getDeviceParameters();
39+
}
40+
@Test
41+
public void testSplitEqually() {
42+
int computeUnits = device.getMaxComputeUnits();
43+
int subComputeUnits = 1;//computeUnits / 2;
44+
45+
CLDevice[] subDevices = device.createSubDevicesEqually(subComputeUnits);
46+
for (CLDevice subDevice : subDevices) {
47+
assertEquals(subComputeUnits, subDevice.getMaxComputeUnits());
48+
}
49+
}
50+
51+
}

0 commit comments

Comments
 (0)
X Tutup