1
- # from evaluate import *
2
- # from interaction import *
3
- # import argparse
4
- # import datetime
5
- # import os
6
-
7
- # # Eval commands example:
8
- # # python main.py gpt-3.5-turbo-1106 --python_eval
9
-
10
- # if __name__=='__main__':
11
-
12
- # if not os.path.exists('../logs/Agents'):
13
- # os.makedirs('../logs/Agents')
14
- # if not os.path.exists('../results'):
15
- # os.makedirs('../results')
16
-
17
- # time_now = datetime.datetime.now()+datetime.timedelta(hours=8)
18
- # time_stamp = str(time_now).replace(' ', '_')
19
- # resfile = f'../results/{time_stamp}.json'
20
- # logfile = f'../logs/Agents/Agents_{time_stamp}.json'
21
-
22
- # parser = argparse.ArgumentParser(description='Select base model for CodeAgent and set parameters.')
23
- # parser.add_argument('model', choices=[
24
- # 'gpt-3.5-turbo-1106', 'gpt-4-1106-preview', 'gpt-4o-2024-05-13',
25
- # 'deepseek-coder-6.7b-instruct', 'deepseek-coder-v2-lite-instruct',
26
- # 'CodeLlama-7b-Instruct-hf', 'CodeLlama-13b-Instruct-hf',
27
- # 'meta-llama-3-8B-instruct', 'meta-llama-3.1-8b-instruct',
28
- # 'meta-llama-3-70b-instruct', 'meta-llama-3.1-70b-instruct',
29
- # 'gemma-2-9b-it',
30
- # 'claude-3-5-sonnet-20240620',
31
- # 'llama-2-7b-chat-hf', 'llama-2-13b-chat-hf', 'llama-2-70b-chat-hf',
32
- # 'codeqwen1.5-7b-chat',
33
- # 'gpt-35-turbo'],
34
- # help='The base model to use in CodeAgent.')
35
- # parser.add_argument('--temperature', type=float, default=0, help='Temperature setting for the base model.')
36
- # parser.add_argument('--top_p', type=float, default=1.0, help='Top-p (nucleus sampling) parameter for the base model.')
37
- # parser.add_argument('--seed', type=int, default=998244353, help='Random seed for reproducibility.')
38
- # parser.add_argument('--max_tokens', type=int, default=1024, help='Token length limitation.')
39
- # parser.add_argument('--dry_run', type=bool, default=False)
40
- # parser.add_argument('--version', type=str, default='v1', help='Version of the dataset to evaluate.')
41
-
42
- # parser.add_argument('--safety_sys_choice', choices=[
43
- # 'none', '0_safety_sys', '1_safety_sys_awareness', '2_safety_sys_detection', '3_safety_sys_modification',
44
- # '4_safety_integration'
45
- # ],
46
- # default='none',
47
- # help='The safety system prompt for risk mitigation.')
48
-
49
- # group = parser.add_mutually_exclusive_group(required=True)
50
- # group.add_argument('--python_eval', action='store_true', help='Flag to enable Python evaluation.')
51
- # group.add_argument('--bash_eval', action='store_true', help='Flag to enable Bash evaluation.')
52
-
53
- # args = parser.parse_args()
54
-
55
- # if args.python_eval:
56
- # task = 'python_eval'
57
- # ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 18, 19, 20, 22, 23, 24, 25, 11, 16, 17, 21, 10]
58
- # if args.bash_eval:
59
- # task = 'bash_eval'
60
- # ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23, 25]
61
-
62
-
63
- # base_model = BaseModel(model=args.model, temperature=args.temperature, top_p=args.top_p, seed=args.seed, max_tokens=args.max_tokens, dry_run=args.dry_run)
64
- # eval_task = EvalTask(task=task.split('_')[0], version=args.version, safety_sys_choice=args.safety_sys_choice)
65
-
66
- # for id in ids:
67
- # image = "redcode"
68
- # with Container(image) as container:
69
-
70
- # if task=='python_eval':
71
- # jsonfile = f'../Trans_code/RedCode-Exec-{args.version}/py2text_dataset_json/index{id}_30_codes_full.json'
72
-
73
- # if task=='bash_eval':
74
- # jsonfile = f'../Trans_code/RedCode-Exec-{args.version}/bash2text_dataset_json/index{id}_30_codes_full_upd.json'
75
-
76
- # eval_dataset(id, container, base_model, jsonfile, resfile, logfile, eval_task)
77
-
78
-
79
1
from RedCode_Exec .RA_evaluation .evaluate import *
80
2
from RedCode_Exec .RA_evaluation .interaction import *
81
3
import argparse
85
7
def RA (model , temperature = 0 , top_p = 1.0 , seed = 998244353 , max_tokens = 1024 , dry_run = False , version = 'v1' ,
86
8
safety_sys_choice = 'none' , task_type = 'python_eval' ):
87
9
88
- # Create directories if not exist
89
- # if not os.path.exists('../logs/Agents'):
90
- # os.makedirs('../logs/Agents')
91
- # if not os.path.exists('../results'):
92
- # os.makedirs('../results')
93
-
94
10
# Create directories if not exist
95
11
if not os .path .exists ('../results/RA/log' ):
96
12
os .makedirs ('../results/RA/log' )
@@ -100,8 +16,6 @@ def RA(model, temperature=0, top_p=1.0, seed=998244353, max_tokens=1024, dry_run
100
16
# Timestamp for file naming
101
17
time_now = datetime .datetime .now () + datetime .timedelta (hours = 8 )
102
18
time_stamp = str (time_now ).replace (' ' , '_' )
103
- # resfile = f'../results/RA/{time_stamp}.json'
104
- # logfile = f'../logs/Agents/Agents_{time_stamp}.json'
105
19
106
20
resfile = f'../results/RA/{ time_stamp } .json'
107
21
logfile = f'../results/RA/log/RA_log_{ time_stamp } .json'
0 commit comments