Machine learning in trading: theory, models, practice and algo-trading - page 1341

 

The script code, if anyone is interested - alas, without class

CSV *csv_Write=new CSV();
int NomerStolbca=0;

input int Set_Total=10;//Количество сетов настроек 1к10
input string CB_Dir="Catboost_Tester";//Директория проекта

input string Version="catboost-0.11.1.exe";//Имя exe файла CatBoost
input int depth=6;//Глубина дерева
input int iterations=1000;//Максимальное число итераций (деревьев)
input double learning_rate=0.03;//Шаг обучения
input int od_wait=100;//Число деревьев без улучщения для остановки обучения


//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
  {
      csv_Write.Add_column(dt_string,0);
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
      csv_Write.Add_column(dt_string,s());
NomerStolbca=0;

string Train_All[23];
Train_All[0]=Version+" fit";
Train_All[s()]=" --learn-set train.csv";
Train_All[s()]=" --test-set test.csv";
Train_All[s()]=" --column-description %%a";
Train_All[s()]=" --has-header";
Train_All[s()]=" --delimiter ;";
Train_All[s()]=" --model-format CatboostBinary,CPP";
Train_All[s()]=" --train-dir ..\Rezultat\RS_01/result_4_%%a";
Train_All[s()]=" --depth "+depth;
Train_All[s()]=" --iterations "+iterations; 
Train_All[s()]=" --nan-mode Forbidden ";
Train_All[s()]=" --learning-rate "+learning_rate; 
Train_All[s()]=" --rsm 1 ";
Train_All[s()]=" --fold-permutation-block 1";
Train_All[s()]=" --boosting-type Ordered";
Train_All[s()]=" --l2-leaf-reg 6";
Train_All[s()]=" --loss-function Logloss:border=0.5";
Train_All[s()]=" --use-best-model";
Train_All[s()]=" --eval-metric Precision";
Train_All[s()]=" --custom-metric Logloss:border=0.5";
Train_All[s()]=" --od-type Iter";
Train_All[s()]=" --od-wait "+od_wait;
Train_All[s()]=" --random-seed ";
NomerStolbca=0;
int Size_Arr=ArraySize(Train_All);
int Seed=0;
int line=0;

for(int N=0;N<Set_Total;N++)
{
         int line=csv_Write.Add_line();
         csv_Write.Set_value(line,1,"FOR %%a IN (*.) DO (",false);
for(int Z=2;Z<22+1;Z++)csv_Write.Set_value(line,Z,"",false);

   for(int i=1;i<10+1;i++)
     {
         Seed=N*10+i;
         Train_All[7]=" --train-dir ..\Rezultat\RS_"+Seed+"/result_4_%%a";

         line=csv_Write.Add_line();
         int x=0;         
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++],false);
         csv_Write.Set_value(line,s(),Train_All[x++]+Seed,false);
         NomerStolbca=0;
     }
         line=csv_Write.Add_line();
         csv_Write.Set_value(line,1,")",false);
         for(int Z=2;Z<22+1;Z++)csv_Write.Set_value(line,Z,"",false);
   }
   
   
     line=csv_Write.Add_line(1,false,true);
     csv_Write.Set_value(line,1,"Pause",false);

      csv_Write.data_separator='\t';
      csv_Write.Write_to_file(CB_Dir+"\\Setup\\_01_Train_All.txt",true,true,false,true,false);
      csv_Write.Clear_all();

//---------
string Rezultat_Exam[9];
Rezultat_Exam[0]=Version+" calc";
Rezultat_Exam[s()]=" --model-path ..\Rezultat\RS_"+"\result_4_%%a\model.bin";//Добавлять номер директории
Rezultat_Exam[s()]=" --input-path exam.csv"; 
Rezultat_Exam[s()]=" --column-description %%a";  
Rezultat_Exam[s()]=" --has-header"; 
Rezultat_Exam[s()]=" --delimiter ;"; 
Rezultat_Exam[s()]=" --output-path ..\Rezultat\RS_"+"\result_4_%%a\output";//Добавлять номер директории
Rezultat_Exam[s()]=" --has-header"; 
Rezultat_Exam[s()]=" --prediction-type Probability";
NomerStolbca=0;
Seed=0;
Size_Arr=ArraySize(Rezultat_Exam);

for(int Z=0;Z<Size_Arr+1;Z++)csv_Write.Add_column(dt_string,Z);
for(int N=0;N<Set_Total;N++)
{
         int line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,"FOR %%a IN (*.) DO (",false);

   for(int i=1;i<10+1;i++)
     {
         Seed=N*10+i;
         Rezultat_Exam[1]=" --model-path ..\Rezultat\RS_"+Seed+"\\result_4_%%a\model.bin";
         Rezultat_Exam[6]=" --output-path ..\Rezultat\RS_"+Seed+"\\result_4_%%a\output";
         int line=csv_Write.Add_line(1,false,true);
         int x=0;  
         for(int S=1;S<Size_Arr+1;S++)csv_Write.Set_value(line,S,Rezultat_Exam[S-1],false);
     }
         line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,")",false);
   }
      
      line=csv_Write.Add_line(1,false,true);
      csv_Write.Set_value(line,1,"Pause",false);

      csv_Write.data_separator='\t';
      csv_Write.Write_to_file(CB_Dir+"\\Setup\\_02_Rezultat_Exam.txt",true,true,false,true,false);
      csv_Write.Clear_all();

//----------
string Rezultat_Test[9];
Rezultat_Test[0]=Version+" calc";
Rezultat_Test[s()]=" --model-path ..\Rezultat\RS_"+"\result_4_%%a\model.bin";//Добавлять номер директории
Rezultat_Test[s()]=" --input-path test.csv"; 
Rezultat_Test[s()]=" --column-description %%a";  
Rezultat_Test[s()]=" --has-header"; 
Rezultat_Test[s()]=" --delimiter ;"; 
Rezultat_Test[s()]=" --output-path ..\Rezultat\RS_"+"\result_4_%%a\output_test";//Добавлять номер директории
Rezultat_Test[s()]=" --has-header"; 
Rezultat_Test[s()]=" --prediction-type Probability";
NomerStolbca=0;
Seed=0;
Size_Arr=ArraySize(Rezultat_Test);

for(int Z=0;Z<Size_Arr+1;Z++)csv_Write.Add_column(dt_string,Z);
for(int N=0;N<Set_Total;N++)
{
         int line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,"FOR %%a IN (*.) DO (",false);
  
   for(int i=1;i<10+1;i++)
     {
         Seed=N*10+i;
         //Train_All[7]=" --train-dir ..\Rezultat\RS_"+Seed+"/result_4_%%a";
         Rezultat_Test[1]=" --model-path ..\Rezultat\RS_"+Seed+"\\result_4_%%a\model.bin";
         Rezultat_Test[6]=" --output-path ..\Rezultat\RS_"+Seed+"\\result_4_%%a\output_test";
         int line=csv_Write.Add_line(1,false,true);
         int x=0;  
         for(int S=1;S<Size_Arr+1;S++)csv_Write.Set_value(line,S,Rezultat_Test[S-1],false);
     }
         line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,")",false);
   }
      line=csv_Write.Add_line(1,false,true);
      csv_Write.Set_value(line,1,"Pause",false);

      csv_Write.data_separator='\t';
      csv_Write.Write_to_file(CB_Dir+"\\Setup\\_02_Rezultat_Test.txt",true,true,false,true,false);
      csv_Write.Clear_all();

//-----------------
string Rezultat_Train[9];
Rezultat_Train[0]=Version+" calc";
Rezultat_Train[s()]=" --model-path ..\Rezultat\RS_"+"\\result_4_%%a\model.bin";//Добавлять номер директории
Rezultat_Train[s()]=" --input-path train.csv"; 
Rezultat_Train[s()]=" --column-description %%a";  
Rezultat_Train[s()]=" --has-header"; 
Rezultat_Train[s()]=" --delimiter ;"; 
Rezultat_Train[s()]=" --output-path ..\Rezultat\RS_"+"\\result_4_%%a\output_train";//Добавлять номер директории
Rezultat_Train[s()]=" --has-header"; 
Rezultat_Train[s()]=" --prediction-type Probability";
NomerStolbca=0;
Seed=0;
Size_Arr=ArraySize(Rezultat_Train);

for(int Z=0;Z<Size_Arr+1;Z++)csv_Write.Add_column(dt_string,Z);
for(int N=0;N<Set_Total;N++)
{
         int line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,"FOR %%a IN (*.) DO (",false);

   for(int i=1;i<10+1;i++)
     {
         Seed=N*10+i;
         Rezultat_Train[1]=" --model-path ..\Rezultat\RS_"+Seed+"\\result_4_%%a\model.bin";
         Rezultat_Train[6]=" --output-path ..\Rezultat\RS_"+Seed+"\\result_4_%%a\output_train";
         int line=csv_Write.Add_line(1,false,true);
         int x=0;  
         for(int S=1;S<Size_Arr+1;S++)csv_Write.Set_value(line,S,Rezultat_Train[S-1],false);
     }
         line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,")",false);
   }
      line=csv_Write.Add_line(1,false,true);
      csv_Write.Set_value(line,1,"Pause",false);

      csv_Write.data_separator='\t';
      csv_Write.Write_to_file(CB_Dir+"\\Setup\\_02_Rezultat_Train.txt",true,true,false,true,false);
      csv_Write.Clear_all();


//-----------------
string Metrik_Exam[8];

Metrik_Exam[0]=Version+"  eval-metrics";
Metrik_Exam[s()]=" --metrics Logloss:border=0.5,Precision,Recall,Kappa,Accuracy,BalancedAccuracy,AUC,F1,MCC";
Metrik_Exam[s()]=" --model-path ..\Rezultat\RS_\result_4_%%a\model.bin";
Metrik_Exam[s()]=" --input-path exam.csv";
Metrik_Exam[s()]=" --column-description %%a"; 
Metrik_Exam[s()]=" --has-header";
Metrik_Exam[s()]=" --delimiter ;"; 
Metrik_Exam[s()]=" --result-dir ..\Rezultat\RS_\result_4_%%a\metr\Exam";
NomerStolbca=0;
Seed=0;
Size_Arr=ArraySize(Metrik_Exam);

for(int Z=0;Z<Size_Arr+1;Z++)csv_Write.Add_column(dt_string,Z);
for(int N=0;N<Set_Total;N++)
{
         int line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,"FOR %%a IN (*.) DO (",false);

   for(int i=1;i<10+1;i++)
     {
         Seed=N*10+i;
         Metrik_Exam[2]=" --model-path ..\Rezultat\RS_"+Seed+"\\result_4_%%a\model.bin";
         Metrik_Exam[7]=" --result-dir ..\Rezultat\RS_"+Seed+"\\result_4_%%a\metr\Exam";
         int line=csv_Write.Add_line(1,false,true);
         int x=0;  
         for(int S=1;S<Size_Arr+1;S++)csv_Write.Set_value(line,S,Metrik_Exam[S-1],false);
     }
         line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,")",false);
   }
      line=csv_Write.Add_line(1,false,true);
      csv_Write.Set_value(line,1,"Pause",false);

      csv_Write.data_separator='\t';
      csv_Write.Write_to_file(CB_Dir+"\\Setup\\_03_Metrik_Exam.txt",true,true,false,true,false);
      csv_Write.Clear_all();


//-----------------
string Metrik_Test[8];

Metrik_Test[0]=Version+"  eval-metrics";
Metrik_Test[s()]=" --metrics Logloss:border=0.5,Precision,Recall,Kappa,Accuracy,BalancedAccuracy,AUC,F1,MCC";
Metrik_Test[s()]=" --model-path ..\Rezultat\RS_\result_4_%%a\model.bin";
Metrik_Test[s()]=" --input-path test.csv";
Metrik_Test[s()]=" --column-description %%a"; 
Metrik_Test[s()]=" --has-header";
Metrik_Test[s()]=" --delimiter ;"; 
Metrik_Test[s()]=" --result-dir ..\Rezultat\RS_\result_4_%%a\metr\Test";
NomerStolbca=0;
Seed=0;
Size_Arr=ArraySize(Metrik_Test);

for(int Z=0;Z<Size_Arr+1;Z++)csv_Write.Add_column(dt_string,Z);
for(int N=0;N<Set_Total;N++)
{
         int line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,"FOR %%a IN (*.) DO (",false);

   for(int i=1;i<10+1;i++)
     {
         Seed=N*10+i;
         Metrik_Test[2]=" --model-path ..\Rezultat\RS_"+Seed+"\\result_4_%%a\model.bin";
         Metrik_Test[7]=" --result-dir ..\Rezultat\RS_"+Seed+"\\result_4_%%a\metr\Test";
         int line=csv_Write.Add_line(1,false,true);
         int x=0;  
         for(int S=1;S<Size_Arr+1;S++)csv_Write.Set_value(line,S,Metrik_Test[S-1],false);
     }
         line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,")",false);
   }
      line=csv_Write.Add_line(1,false,true);
      csv_Write.Set_value(line,1,"Pause",false);

      csv_Write.data_separator='\t';
      csv_Write.Write_to_file(CB_Dir+"\\Setup\\_03_Metrik_Test.txt",true,true,false,true,false);
      csv_Write.Clear_all();

//-----------------
string Metrik_Train[8];

Metrik_Train[0]=Version+"  eval-metrics";
Metrik_Train[s()]=" --metrics Logloss:border=0.5,Precision,Recall,Kappa,Accuracy,BalancedAccuracy,AUC,F1,MCC";
Metrik_Train[s()]=" --model-path ..\Rezultat\RS_\result_4_%%a\model.bin";
Metrik_Train[s()]=" --input-path test.csv";
Metrik_Train[s()]=" --column-description %%a"; 
Metrik_Train[s()]=" --has-header";
Metrik_Train[s()]=" --delimiter ;"; 
Metrik_Train[s()]=" --result-dir ..\Rezultat\RS_\result_4_%%a\metr\Train";
NomerStolbca=0;
Seed=0;
Size_Arr=ArraySize(Metrik_Train);

for(int Z=0;Z<Size_Arr+1;Z++)csv_Write.Add_column(dt_string,Z,100);
for(int N=0;N<Set_Total;N++)
{
         int line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,"FOR %%a IN (*.) DO (",false);
   
   for(int i=1;i<10+1;i++)
     {
         Seed=N*10+i;
         Metrik_Train[2]=" --model-path ..\Rezultat\RS_"+Seed+"\\result_4_%%a\model.bin";
         Metrik_Train[7]=" --result-dir ..\Rezultat\RS_"+Seed+"\\result_4_%%a\metr\Train";
         int line=csv_Write.Add_line(1,false,true);
         int x=0;  
         for(int S=1;S<Size_Arr+1;S++)csv_Write.Set_value(line,S,Metrik_Train[S-1],false);
     }
         line=csv_Write.Add_line(1,false,true);
         csv_Write.Set_value(line,1,")",false);
   }
      line=csv_Write.Add_line(1,false,true);
      csv_Write.Set_value(line,1,"Pause",false);

      csv_Write.data_separator='\t';
      csv_Write.Write_to_file(CB_Dir+"\\Setup\\_03_Metrik_Train.txt",true,true,false,true,false);
      csv_Write.Clear_all();

  }
  
//+------------------------------------------------------------------+
int s()
  {
   NomerStolbca++;
   return (NomerStolbca);
  }
 

I made an analysis of the parallelism of the models, i.e. showing the activation points of the threshold (by default 0.5) on the test sample.

We can see that the models are very similar in general, but we are surprised by another thing - very large time intervals, where activation did not take place. Perhaps the reason is in some predictor, which takes information from the monthly bar...

 
Aleksey Vyazmikin:

I made an analysis of the parallelism of the models, i.e. showing the activation points of the threshold (by default 0.5) on the test sample.

We can see that the models are very similar in general, but we are surprised by another thing - very large time intervals, where activation did not take place. Perhaps the reason is in some predictor that takes information from the monthly bar...

There may be a lot of consecutive deals on each bar? I had a similar problem with NS. The conclusion is similar - the large TF affects, and the small ones supplement it.
 
elibrarius:
And then a bunch of trades in a row on every bar? I had a similar thing with NS. The conclusion is similar - the large TF affects, and the small ones supplement it.

Not really, just deals bigger than the width of the screen - I didn't post a big one. But the fact that it happens in the form of groupings - yes. Whether it is worth to throw out the upper TF, which cuts the possibility of entry so much, or not is the question...

 
Alexander_K:

+++

No offense to Alexei - strike me dead if I understand a word of what he writes. Neither the goals nor the methods of achieving them are clear or substantiated. The spirit of the Teacher, who spent 15 years on neural networks, and now works at a car wash, so hovers over him.

Yes, we all walk under God, there is no particular difference at a car wash, or some manager, "grown up" to a pseudo-partner, who, however, can just as easily get his ass kicked before retirement and left at the broken trough. Now in some offices in the Western style for 100 employees 30 "vice-presidents", the middle manager is now a vice-presidency for 50 thousand a month)))) It's funny and sinful...

Either a conscious risk and a real threat to work at the car wash, but along the way with more enthusiasm, interest and adventure, or an even greater risk but hidden "under the carpet" "career growth," with disappointment as in the process and as a result. You're born a plebeian - it's better to give up hope right away, but you can try to "come to success", you have nothing to lose anyway, at least before you die you can say that you did everything you could and did not grovel your whole life as a bitch)))

 
Aleksey Vyazmikin:

Whether it is worth it to throw out the upper TF, which cuts the opportunity to enter, or not, that is the question...

There are two problems in this case.

 
elibrarius:

Well either manually sweat it out and decide, or do some optimization.

It is necessary to dig the sheets and find the cause, in the next few days I will do - I need to dig for this case.

So far here are some models shaken bagging - it seems that they have more variation, which may be interesting in terms of paired application


 

Congratulations.

Terminal: Added API for requesting data from MetaTrader 5 terminal via applications usingR language.

We have prepared a special package MetaTraderR. It contains DLL for interaction between R and MetaTrader 5 terminal, documentation and auxiliary r-files. Now the package is in the process of registration in theCRANrepository, and soon it will be available for download and installation.

We'll wait for the sequel.

Good luck

 
Vladimir Perervenko:

Congratulations.

Terminal: Added API for requesting data from MetaTrader 5 terminal via applications usingR language.

We have prepared a special package MetaTraderR. It contains DLL for interaction between R and MetaTrader 5 terminal, documentation and auxiliary r-files. The package is now being registered in theCRANrepository and will soon be available for download and installation.

We'll wait for the sequel.

Good luck

The developers ignore the question of feedback, so the probability is not great that it will be...

 

Here's another way of representing the behavior of the models in the sample - here by color:

TP - correct classification "1" - green

FP - wrong classification "1" - red

FN - wrong classification "0" (actually missed "1") - blue

The screen size is large - it's more interesting to watch by click.

And the gif on pressing the two options will switch for clarity

It can be seen that my models have very little dip in the market, because a lot of blue - we need to look into the causes of inactivity. Perhaps we should look for other ways to stop training, not only on the accuracy. In this case the traceability and accuracy should not be limited, but for some unknown reason such a variant of stopping the training is not provided by the developers, but it is a pity.