@MiniKnight they are moving slightly as I get better numbers/ re-test some of the nodes with L3 cache MB / 2.
I'm running it on gentoo (with NUMA enabled), but should work similarly for ubuntu....@jim any tips on getting that setup on Ubuntu? I will try to do it in Docker this weekend and see how it performs on larger machines.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b377c22..464cb63 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_BUILD_TYPE RELEASE)
endif()
-set(CMAKE_C_FLAGS "-DNDEBUG -march=westmere -O3 -m64 -s")
+set(CMAKE_C_FLAGS "-DNDEBUG -march=native -mtune=native -Ofast -funroll-loops -m64 -s")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -std=c++11")
set(CMAKE_EXE_LINKER_FLAGS_RELSEASE "")
diff --git a/config.txt b/config.txt
index 23f5dc3..e29c37d 100644
--- a/config.txt
+++ b/config.txt
@@ -3,7 +3,7 @@
* here is the size of your L3 cache divided by 2. Intel mid-to-high end desktop processors have 2MB of L3
* cache per physical core. Low end cpus can have 1.5 or 1 MB while Xeons can have 2, 2.5 or 3MB per core.
*/
-"cpu_thread_num" : 2,
+"cpu_thread_num" : 20,
/*
* Thread configuration for each thread. Make sure it matches the number above.
@@ -22,8 +22,26 @@
*
*/
"cpu_threads_conf" : [
- { "low_power_mode" : false, "no_prefetch" : false, "affine_to_cpu" : 0 },
- { "low_power_mode" : false, "no_prefetch" : false, "affine_to_cpu" : 1 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 0 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 1 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 2 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 3 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 4 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 5 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 6 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 7 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 8 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 9 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 10 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 11 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 12 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 13 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 14 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 15 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 16 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 17 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 24 },
+ { "low_power_mode" : false, "no_prefetch" : true, "affine_to_cpu" : 25 },
],
sudo sysctl -w vm.nr_hugepages=128
YourUsername soft memlock 262144
YourUsername hard memlock 262144
cmake .
make
while true; do ./bin/xmr-stak-cpu; done;
Also it's based on C++11 so that is probably one reason it's fast.Default dev donation
By default the miner will donate 1% of the hashpower (1 minute in 100 minutes) to my pool. If you want to change that, edit donate-level.h before you build the binaries.
If you want to donate directly to support further development, here is my wallet
4581HhZkQHgZrZjKeCfCJxZff9E3xCgHGF25zABZz7oR71TnbbgiS7sK9jveE6Dx6uMs2LwszDuvQJgRZQotdpHt1fTdDhk
Yes, 12 of 16 threads. Anything more or less drops below 200H/s.@cafcwest great results! so you are using 12 of 16 threads per node? That is a great result. Which version are you using?
Also, how close is your KH/s in the console to your estimated earnings? I think I have aggregate 23KH/s but am seeing returns of more like 17KH/s. I am not seeing lost shares in the logs/ console. The moneropool.com UI seems to be 23KH/s as well. Returns are low enough I was thinking of just setting up a pool myself to see if that would help.