The gateway to Torque configuration is the command qmgr
. It reads configuration directives from stdin
or from command-line argument (qmgr -c “directive”
).
Configuration directives are of the form
command server|queue|node [names] [attr OP value[,attr OP value,...]]
where command
is set | unset | print | create | delete | list
, names
is a list of object names such as a queue name, and attributes are various object properties. Refer to the following links for detailed information on server and queue parameters.
The directive print server
(qmgr -c “print server”
or qmgr -c “p s”
) prints all the server configuration in a format that is just the list of commands that needs to be given to qmgr to reconfigure the server exactly as it is. Example:
configuration at 6 of May 2009.
[root@licossrv4 maui]# qmgr -c 'p s' # # Create queues and set their attributes. # # # Create and define queue long # create queue long set queue long queue_type = Execution set queue long Priority = 60 set queue long max_running = 130 set queue long resources_default.nice = 5 set queue long acl_group_enable = True set queue long acl_groups = algo set queue long acl_groups += licos set queue long acl_groups += arni set queue long acl_groups += lthc set queue long acl_groups += lthi set queue long acl_groups += lcm set queue long acl_groups += student set queue long max_user_run = 150 set queue long enabled = False set queue long started = True # # Create and define queue default # create queue default set queue default queue_type = Route set queue default resources_default.neednodes = general set queue default route_destinations = short set queue default route_destinations += long set queue default route_destinations += smallmem set queue default route_destinations += night set queue default route_destinations += bertrand set queue default route_destinations += nolimit set queue default route_destinations += algo set queue default enabled = True set queue default started = True # # Create and define queue nolimit # create queue nolimit set queue nolimit queue_type = Execution set queue nolimit Priority = 60 set queue nolimit max_running = 130 set queue nolimit resources_default.neednodes = nows set queue nolimit resources_default.nice = 5 set queue nolimit acl_group_enable = True set queue nolimit acl_groups = algo set queue nolimit acl_groups += licos set queue nolimit acl_groups += arni set queue nolimit acl_groups += lthc set queue nolimit acl_groups += lthi set queue nolimit acl_groups += lcm set queue nolimit acl_groups += student set queue nolimit max_user_run = 150 set queue nolimit enabled = True set queue nolimit started = True # # Create and define queue smallmem # create queue smallmem set queue smallmem queue_type = Execution set queue smallmem Priority = 80 set queue smallmem max_running = 130 set queue smallmem resources_max.mem = 512mb set queue smallmem resources_default.nice = 5 set queue smallmem acl_group_enable = True set queue smallmem acl_groups = algo set queue smallmem acl_groups += licos set queue smallmem acl_groups += arni set queue smallmem acl_groups += lthc set queue smallmem acl_groups += lthi set queue smallmem acl_groups += lcm set queue smallmem acl_groups += student set queue smallmem acl_groups += 11240 set queue smallmem max_user_run = 150 set queue smallmem enabled = True set queue smallmem started = True # # Create and define queue night # create queue night set queue night queue_type = Execution set queue night Priority = 60 set queue night max_running = 130 set queue night resources_max.cput = 08:00:00 set queue night resources_default.nice = 5 set queue night acl_group_enable = True set queue night acl_groups = algo set queue night acl_groups += licos set queue night acl_groups += arni set queue night acl_groups += lthc set queue night acl_groups += lthi set queue night acl_groups += lcm set queue night acl_groups += student set queue night max_user_run = 150 set queue night enabled = False set queue night started = False # # Create and define queue short # create queue short set queue short queue_type = Execution set queue short Priority = 100 set queue short resources_max.cput = 01:00:00 set queue short resources_default.cput = 01:00:00 set queue short resources_default.nice = 5 set queue short acl_group_enable = True set queue short acl_groups = algo set queue short acl_groups += licos set queue short acl_groups += arni set queue short acl_groups += lthc set queue short acl_groups += lthi set queue short acl_groups += lcm set queue short acl_groups += student set queue short max_user_run = 100 set queue short enabled = True set queue short started = True # # Create and define queue algo # create queue algo set queue algo queue_type = Execution set queue algo Priority = 60 set queue algo resources_max.cput = 24:00:00 set queue algo resources_default.neednodes = algo set queue algo resources_default.nice = 5 set queue algo resources_default.walltime = 01:00:00 set queue algo acl_group_enable = False set queue algo acl_groups = algo set queue algo enabled = True set queue algo started = True # # Create and define queue bertrand # create queue bertrand set queue bertrand queue_type = Execution set queue bertrand acl_user_enable = True set queue bertrand acl_users = bertrand set queue bertrand acl_users += cangiani set queue bertrand acl_users += damir set queue bertrand resources_default.neednodes = bertrand set queue bertrand enabled = True set queue bertrand started = True # # Set server attributes. # set server scheduling = True set server managers = cangiani@*.epfl.ch set server managers += damir@*.epfl.ch set server managers += laurenzi@*.epfl.ch set server managers += root@iscsrv27.epfl.ch set server operators = cangiani@*.epfl.ch set server operators += damir@*.epfl.ch set server operators += laurenzi@*.epfl.ch set server default_queue = default set server log_events = 511 set server mail_from = adm set server query_other_jobs = True set server resources_default.cput = 01:00:00 set server resources_default.mem = 512mb set server resources_default.nodect = 1 set server resources_default.nodes = 1 set server scheduler_iteration = 600 set server node_check_rate = 600 set server tcp_timeout = 6 set server job_nanny = True set server pbs_version = 2.1.10 set server allow_node_submit = True
Maui reads its configuration from a simple text file: /usr/local/maui/maui.cfg
. Here is a full list of parameters, and here the the maui administration guide in pdf.
current configuration 6 of may 2009
# maui.cfg 3.2.6p19 SERVERHOST iscsrv27.epfl.ch # primary admin must be first in list ADMIN1 root damir cangiani ADMIN2 damir cangiani ADMINHOST localhost iscsrv27 iscsrv27.epfl.ch lthipc1.epfl.ch lthipc1 # Resource Manager Definition RMTYPE[0] PBS RMHOST[0] iscsrv27.epfl.ch RMPOST[0] 15001 #RMCFG[LICOSSRV4.EPFL.CH] TYPE=PBS@RMNMHOST@ # Allocation Manager Definition AMCFG[bank] TYPE=NONE # full parameter docs at http://supercluster.org/mauidocs/a.fparameters.html # use the 'schedctl -l' command to display current configuration RMPOLLINTERVAL 00:01:00 SERVERPORT 42559 #SERVERPORT 15004 SERVERMODE NORMAL #SERVERMODE TEST # Admin: http://supercluster.org/mauidocs/a.esecurity.html LOGFILE maui.log LOGFILEMAXSIZE 100000000 LOGLEVEL 3 # Job Priority: http://supercluster.org/mauidocs/5.1jobprioritization.html QUEUETIMEWEIGHT 1 JOBPRIOACCRUALPOLICY FULLPOLICY MAXJOBQUEUEDPERUSER 32 # MAXPROCPERUSER 0 XFWEIGHT 200 # FairShare: http://supercluster.org/mauidocs/6.3fairshare.html FSPOLICY DEDICATEDPES FSDEPTH 7 FSINTERVAL 86400 FSDECAY 0.80 FSWEIGHT 20 FSUSERWEIGHT 1 FSGROUPWEIGHT 0 USERCFG[DEFAULT] FSTARGET=100 # Throttling Policies: http://supercluster.org/mauidocs/6.2throttlingpolicies.html # NONE SPECIFIED # Backfill: http://supercluster.org/mauidocs/8.2backfill.html BACKFILLPOLICY FIRSTFIT RESERVATIONPOLICY CURRENTHIGHEST # Node Allocation: http://supercluster.org/mauidocs/5.2nodeallocation.html # NODEALLOCATIONPOLICY MINRESOURCE # get always the fastest node instead NODEALLOCATIONPOLICY FASTEST # QOS: http://supercluster.org/mauidocs/7.3qos.html # QOSCFG[hi] PRIORITY=100 XFTARGET=100 FLAGS=PREEMPTOR:IGNMAXJOB # QOSCFG[low] PRIORITY=-1000 FLAGS=PREEMPTEE # Standing Reservations: http://supercluster.org/mauidocs/7.1.3standingreservations.html # SRSTARTTIME[test] 8:00:00 # SRENDTIME[test] 17:00:00 # SRDAYS[test] MON TUE WED THU FRI # SRTASKCOUNT[test] 20 # SRMAXTIME[test] 0:30:00 # Creds: http://supercluster.org/mauidocs/6.1fairnessoverview.html # USERCFG[DEFAULT] FSTARGET=100 # GROUPCFG[DEFAULT] FSTARGET=100 # USERCFG[john] PRIORITY=100 FSTARGET=10.0- # GROUPCFG[staff] PRIORITY=1000 QLIST=hi:low QDEF=hi # CLASSCFG[batch] FLAGS=PREEMPTEE # CLASSCFG[interactive] FLAGS=PREEMPTOR # node configurations # NODECFG[nodename] properties list # where nodename can also be 'DEFAULT', and (some of the) properties are # PARTITION=string a way to group machines. Multi CPU jobs are always # runned within the same partition. # (e.g. PARTITION=algo_cluster) It can also be used to # give exclusive use of certain machines to a given group # GROUPCFG[algo] PLIST=algo_cluster # PROCSPEED=integer the processor clock in MHz # SPEED=float the overall speed of the machine with respect to a # standard reference. 0.0 < float <= 100.0 # FEATURES=stringlist a set of "opaque" feature of the node (e.g. # FEATURES=Opteron:myrinet:fpga:matlab # MAXJOB=integer maximum number of job that can run simultaneusly # #NODECFG[DEFAULT] MAXLOAD=2.5 #NODECFG[DEFAULT] MAXJOB=2 DEFERTIME 0