# ======================================== ENVIRONMENT

OSVERS=miss

LOGBACK_VERSION=miss

JPATH=miss

LLHASH=c3fab184644a56f1783a5fbb10d20555997bfe64

# ======================================== EXPERIMENT

LABEL=20251230-KARL
NAME=${LABEL}
DESIGNDATE=2025-01-12
OPT="-DnbAgents=${NBAGENTS} -DnbIterations=${NBITERATIONS} -DnbRuns=${NBRUNS} -DreportPrecRec" 
LOADOPT=
DIRPREF=${NBAGENTS}-${NBITERATIONS}
TOCOPY="params.sh script.sh notebook.ipynb logback.xml"
OUTPUT=results/

# ----- ENVIRONMENT -----
NBAGENTS=10
NBINTERACTIONS=100000 # Max number of iterations before stopping the experiment for the current generation
NBGENERATIONS=39 # Number of vertical transmissions between generations
NBRUNS=5

NBPROPERTIES=6 # Number of binary properties describing objects. NBOBJECTS = 2 ** NBPROPERTIES
NBDECISIONS=4 # Number of classes or decisions to discriminate objects
KB_INIT_METHOD=random # choose in [random, consistent]
ADAPTING_AGENT_SELECTION=accuracy # choose in [accuracy, successrate]

GET_LOGS=true # Output logs in a ``results'' folder
# SEED=24

# ----- RL -----
# LEARNING_METHOD=singleop5 # choose in [thompson, softmax, a3c, random+, singleopN (with N in {0,6} for operator ID)]

# ------ A3C Method -----
NN_ACT_INTERN_DIM=256
NN_CRIT_INTERN_DIM=256

TEMP_DECAY_SET=false
TEMP=10.0
TEMP_MIN=0.1
TEMP_DECAY=0.99984

EPS_START=0.9
EPS_MIN=0.01
EPS_DECAY=0.9984
EPS_DECAY_METHOD=time # choose in [time, srate]

LR_ACT=1e-3
LR_CRIT=1e-3

# ======================================== METADATA

VARIATIONOF=
EXPE="Agents adapt symbolic knowledge-base using reinforcement learning to agree on decision making."

HYPOTHESIS=[**Learned policies are independent of the environment**, **Learned policies enable agents to reach consensus efficiently**]

DATE=2025-12-30

SETTING="Variation of ${VARIATIONOF} with a different setting (describe)"

CLASSES="Population, PopulatedARExperiment, PopulationAlignmentAdjustingAgent, NOOEnvironment, ActionLogger, AverageLogger, Monitor"

# ======================================== STATUS

RESULT="After multiple interactions, agents learnt a strategy that (1) make them agree with each other, (2) they agree on accurate decisions and (3) this strategy does not depend on the environment they are in."
# One of: DESIGNED PERFORMED VALID SUBSUMED PARTLY UNCERTAIN INVALID
STATUS=PERFORMED
#STATUSDESC

PERFORMER="Richard Trézeux"
DESIGNER=${PERFORMER}
EXPERIMENTER=${PERFORMER}
ANALYST=${PERFORMER}