Skip to content

Commit ce0a94e

Browse files
committedDec 10, 2024
update environment
1 parent 23a89c9 commit ce0a94e

File tree

12 files changed

+78
-174
lines changed

12 files changed

+78
-174
lines changed
 

‎environment.yml

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: redcode
1+
name: agent
22
channels:
33
- defaults
44
dependencies:
@@ -37,19 +37,24 @@ dependencies:
3737
- zlib=1.2.13=h5eee18b_0
3838
- zstd=1.5.5=hc292b87_2
3939
- pip:
40+
- accelerate==1.0.1
4041
- blinker==1.8.1
4142
- certifi==2024.2.2
4243
- evdev==1.7.1
4344
- flask==3.0.3
4445
- idna==3.7
4546
- itsdangerous==2.2.0
47+
- jiter==0.8.2
4648
- jsonpickle==3.0.4
49+
- openai==0.28.1
4750
- pynput==1.7.7
4851
- pyparsing==3.1.2
4952
- pyperclip==1.8.2
5053
- python-xlib==0.33
5154
- pyyaml==6.0.1
55+
- safetensors==0.4.5
5256
- scapy==2.5.0
5357
- six==1.16.0
5458
- typing-extensions==4.11.0
5559
- werkzeug==3.0.2
60+
prefix: /data2/common/chengquan/anaconda3/envs/agent

‎evaluation/RedCode_Exec/CA_evaluation/CA.py

+22-10
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def save_result(eval_log_json):
104104

105105
for input_type in all_input_types:
106106
try:
107-
eval_result_folder= os.path.join("results",input_type)
107+
eval_result_folder= os.path.join("../results/CA",input_type)
108108
os.makedirs(eval_result_folder, exist_ok=True)
109109

110110
prompt=prompt_type[input_type]
@@ -151,7 +151,7 @@ def save_result(eval_log_json):
151151
print(f" ----- {index} done")
152152

153153

154-
def CA(model_name="CodeActAgent-Mistral-7b-v0.1", max_exec=3, openai_base_url="http://localhost:8080/v1",
154+
def CA(model_name="CodeActAgent-Mistral-7b-v0.1", max_exec=3, openai_base_url="http://localhost:8088/v1",
155155
dry_run=False, ids=2, ids_start=-1, ids_end=-1, max_token=512, folder_prefix=""):
156156

157157
# Set up ids based on input arguments
@@ -170,17 +170,29 @@ def CA(model_name="CodeActAgent-Mistral-7b-v0.1", max_exec=3, openai_base_url="h
170170
generator = Generator(model_name, openai_base_url)
171171
agent = Agent(generator, container, conv_id=time_stamp, max_new_tokens=max_token)
172172

173-
res_folder = os.path.join(f"results{folder_prefix}", model_name)
174-
log_folder = os.path.join(f"logs{folder_prefix}", model_name)
173+
# res_folder = os.path.join(f"results{folder_prefix}", model_name)
174+
# log_folder = os.path.join(f"logs{folder_prefix}", model_name)
175175

176-
os.makedirs(res_folder, exist_ok=True)
177-
os.makedirs(log_folder, exist_ok=True)
176+
# os.makedirs(res_folder, exist_ok=True)
177+
# os.makedirs(log_folder, exist_ok=True)
178178

179179
# Evaluate datasets for each id
180+
# Get the current file's directory
181+
current_file_path = os.path.dirname(os.path.abspath(__file__))
182+
dataset_base_path = os.path.join(current_file_path, '../../../dataset/RedCode-Exec')
183+
184+
# Set dataset JSON file path based on task type
185+
186+
187+
180188
for id in ids_list:
181-
resfile = os.path.join(res_folder, f'idx{id}_{time_stamp}.json')
182-
logfile = os.path.join(log_folder, f'idx{id}.json')
183-
jsonfile = f'../Trans_code/py2text_dataset_json/index{id}_30_codes_full.json'
189+
# resfile = os.path.join(res_folder, f'idx{id}_{time_stamp}.json')
190+
# logfile = os.path.join(log_folder, f'idx{id}.json')
191+
# jsonfile = f'../Trans_code/py2text_dataset_json/index{id}_30_codes_full.json'
192+
193+
resfile = f'../results/CA/{time_stamp}.json'
194+
logfile = f'../results/CA/log/RA_log_{time_stamp}.json'
195+
jsonfile = os.path.join(dataset_base_path, f'py2text_dataset_json/index{id}_30_codes_full.json')
184196

185197
eval_dataset(id, agent, jsonfile, resfile, logfile, max_exec, dry_run)
186198

@@ -189,7 +201,7 @@ def CA(model_name="CodeActAgent-Mistral-7b-v0.1", max_exec=3, openai_base_url="h
189201
# parser = argparse.ArgumentParser(description='Select base model for CodeAgent and set parameters.')
190202
# parser.add_argument('--model_name',default= "CodeActAgent-Mistral-7b-v0.1", help='The base model to use in CodeAgent.')
191203
# parser.add_argument('--max_exec', type=int, default=3)
192-
# parser.add_argument('--openai_base_url', type=str, default="http://localhost:8080/v1")
204+
# parser.add_argument('--openai_base_url', type=str, default="http://localhost:8088/v1")
193205
# parser.add_argument('--dry_run', type=bool, default=False)
194206
# parser.add_argument('--ids', type=int, default=2)
195207
# parser.add_argument('--ids_start', type=int, default=-1)

‎evaluation/RedCode_Exec/CA_evaluation/codeact_agent.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,14 @@
1414
from typing import List, Dict
1515
from datetime import datetime
1616

17-
# if openai.__version__ >= "0.28.1":
18-
# raise RuntimeError(
19-
# "Please use the compatbile version of openai (<=0.28.1) to use this script."
20-
# )
17+
print("OpenAI Path:", openai.__file__)
18+
print("OpenAI Version:", openai.__version__)
19+
20+
21+
if openai.__version__ > "0.28.1":
22+
raise RuntimeError(
23+
"Please use the compatbile version of openai (<=0.28.1) to use this script."
24+
)
2125

2226

2327
logging.basicConfig(level=logging.INFO)

‎evaluation/RedCode_Exec/CA_evaluation/run.sh

-15
This file was deleted.

‎evaluation/RedCode_Exec/RA_evaluation/RA.py

-86
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,3 @@
1-
# from evaluate import *
2-
# from interaction import *
3-
# import argparse
4-
# import datetime
5-
# import os
6-
7-
# # Eval commands example:
8-
# # python main.py gpt-3.5-turbo-1106 --python_eval
9-
10-
# if __name__=='__main__':
11-
12-
# if not os.path.exists('../logs/Agents'):
13-
# os.makedirs('../logs/Agents')
14-
# if not os.path.exists('../results'):
15-
# os.makedirs('../results')
16-
17-
# time_now = datetime.datetime.now()+datetime.timedelta(hours=8)
18-
# time_stamp = str(time_now).replace(' ', '_')
19-
# resfile = f'../results/{time_stamp}.json'
20-
# logfile = f'../logs/Agents/Agents_{time_stamp}.json'
21-
22-
# parser = argparse.ArgumentParser(description='Select base model for CodeAgent and set parameters.')
23-
# parser.add_argument('model', choices=[
24-
# 'gpt-3.5-turbo-1106', 'gpt-4-1106-preview', 'gpt-4o-2024-05-13',
25-
# 'deepseek-coder-6.7b-instruct', 'deepseek-coder-v2-lite-instruct',
26-
# 'CodeLlama-7b-Instruct-hf', 'CodeLlama-13b-Instruct-hf',
27-
# 'meta-llama-3-8B-instruct', 'meta-llama-3.1-8b-instruct',
28-
# 'meta-llama-3-70b-instruct', 'meta-llama-3.1-70b-instruct',
29-
# 'gemma-2-9b-it',
30-
# 'claude-3-5-sonnet-20240620',
31-
# 'llama-2-7b-chat-hf', 'llama-2-13b-chat-hf', 'llama-2-70b-chat-hf',
32-
# 'codeqwen1.5-7b-chat',
33-
# 'gpt-35-turbo'],
34-
# help='The base model to use in CodeAgent.')
35-
# parser.add_argument('--temperature', type=float, default=0, help='Temperature setting for the base model.')
36-
# parser.add_argument('--top_p', type=float, default=1.0, help='Top-p (nucleus sampling) parameter for the base model.')
37-
# parser.add_argument('--seed', type=int, default=998244353, help='Random seed for reproducibility.')
38-
# parser.add_argument('--max_tokens', type=int, default=1024, help='Token length limitation.')
39-
# parser.add_argument('--dry_run', type=bool, default=False)
40-
# parser.add_argument('--version', type=str, default='v1', help='Version of the dataset to evaluate.')
41-
42-
# parser.add_argument('--safety_sys_choice', choices=[
43-
# 'none', '0_safety_sys', '1_safety_sys_awareness', '2_safety_sys_detection', '3_safety_sys_modification',
44-
# '4_safety_integration'
45-
# ],
46-
# default='none',
47-
# help='The safety system prompt for risk mitigation.')
48-
49-
# group = parser.add_mutually_exclusive_group(required=True)
50-
# group.add_argument('--python_eval', action='store_true', help='Flag to enable Python evaluation.')
51-
# group.add_argument('--bash_eval', action='store_true', help='Flag to enable Bash evaluation.')
52-
53-
# args = parser.parse_args()
54-
55-
# if args.python_eval:
56-
# task = 'python_eval'
57-
# ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 18, 19, 20, 22, 23, 24, 25, 11, 16, 17, 21, 10]
58-
# if args.bash_eval:
59-
# task = 'bash_eval'
60-
# ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23, 25]
61-
62-
63-
# base_model = BaseModel(model=args.model, temperature=args.temperature, top_p=args.top_p, seed=args.seed, max_tokens=args.max_tokens, dry_run=args.dry_run)
64-
# eval_task = EvalTask(task=task.split('_')[0], version=args.version, safety_sys_choice=args.safety_sys_choice)
65-
66-
# for id in ids:
67-
# image = "redcode"
68-
# with Container(image) as container:
69-
70-
# if task=='python_eval':
71-
# jsonfile = f'../Trans_code/RedCode-Exec-{args.version}/py2text_dataset_json/index{id}_30_codes_full.json'
72-
73-
# if task=='bash_eval':
74-
# jsonfile = f'../Trans_code/RedCode-Exec-{args.version}/bash2text_dataset_json/index{id}_30_codes_full_upd.json'
75-
76-
# eval_dataset(id, container, base_model, jsonfile, resfile, logfile, eval_task)
77-
78-
791
from RedCode_Exec.RA_evaluation.evaluate import *
802
from RedCode_Exec.RA_evaluation.interaction import *
813
import argparse
@@ -85,12 +7,6 @@
857
def RA(model, temperature=0, top_p=1.0, seed=998244353, max_tokens=1024, dry_run=False, version='v1',
868
safety_sys_choice='none', task_type='python_eval'):
879

88-
# Create directories if not exist
89-
# if not os.path.exists('../logs/Agents'):
90-
# os.makedirs('../logs/Agents')
91-
# if not os.path.exists('../results'):
92-
# os.makedirs('../results')
93-
9410
# Create directories if not exist
9511
if not os.path.exists('../results/RA/log'):
9612
os.makedirs('../results/RA/log')
@@ -100,8 +16,6 @@ def RA(model, temperature=0, top_p=1.0, seed=998244353, max_tokens=1024, dry_run
10016
# Timestamp for file naming
10117
time_now = datetime.datetime.now() + datetime.timedelta(hours=8)
10218
time_stamp = str(time_now).replace(' ', '_')
103-
# resfile = f'../results/RA/{time_stamp}.json'
104-
# logfile = f'../logs/Agents/Agents_{time_stamp}.json'
10519

10620
resfile = f'../results/RA/{time_stamp}.json'
10721
logfile = f'../results/RA/log/RA_log_{time_stamp}.json'

‎evaluation/RedCode_Exec/RA_evaluation/ReAct.py

+9-18
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
from RedCode_Exec.RA_evaluation.interaction import *
2-
from openai import OpenAI
2+
# from openai import OpenAI
33
import docker
44
import re
5-
from openai import OpenAIError, RateLimitError, APIError
5+
# from openai import OpenAIError, RateLimitError, APIError
6+
import openai
7+
from openai.error import OpenAIError, RateLimitError, APIError # Import the correct error classes
8+
69
import time
710
from transformers import AutoTokenizer, AutoModelForCausalLM
811
import torch
@@ -151,15 +154,16 @@ def memory_clean_repetitive_patterns(memory):
151154
attempt = 0
152155
while attempt < max_retries:
153156
try:
154-
response = base_model.client.chat.completions.create(
157+
response = openai.ChatCompletion.create(
155158
model=base_model.model,
156159
temperature=base_model.temperature,
157160
top_p=base_model.top_p,
158-
seed=base_model.seed,
159161
max_tokens=base_model.max_tokens,
160162
messages=memory
161163
)
162-
answer = response.choices[0].message.content
164+
165+
# Access the generated content
166+
answer = response['choices'][0]['message']['content']
163167
return answer
164168
except (OpenAIError, RateLimitError, APIError) as e:
165169
attempt += 1
@@ -169,19 +173,6 @@ def memory_clean_repetitive_patterns(memory):
169173
time.sleep(wait_time)
170174
raise Exception(f"Failed to get a response from the API after {max_retries} attempts.")
171175

172-
# def generate_response():
173-
# response = client.chat.completions.create(
174-
# model="gpt-3.5-turbo",
175-
# # model="gpt-4-1106-preview",
176-
# temperature=0,
177-
# messages=[
178-
# AGENT_SYS,
179-
# {"role": "user", "content": f"Prompt: {prompt}\n\nLanguage: {language}\n\nCode:```\n{code}\n```"}
180-
# ]
181-
# )
182-
# json_data = response.choices[0].message.content
183-
# return json_data
184-
185176
def response2json_default(data):
186177
# Transfer the string input into valid json format or None
187178
if "think" not in data.lower():

‎evaluation/RedCode_Exec/RA_evaluation/evaluate.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from RedCode_Exec.RA_evaluation.interaction import *
22
from RedCode_Exec.evaluation import evaluation
33
from RedCode_Exec.RA_evaluation.ReAct import ReAct_interaction,AGENT_SYS
4-
from openai import OpenAI
54
from collections import Counter
65
import json
76
import datetime

‎evaluation/RedCode_Exec/RA_evaluation/interaction.py

+23-31
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
import docker
88
import torch
9-
from openai import AzureOpenAI, OpenAI
9+
# from openai import AzureOpenAI, OpenAI
10+
import openai
1011
from transformers import AutoModelForCausalLM, AutoTokenizer
1112

1213
from .sysprompt import (SAFETY_SYS_SUFFIX0, SAFETY_SYS_SUFFIX1,
@@ -37,10 +38,10 @@ def __init__(self, model, temperature, top_p, seed, max_tokens, dry_run):
3738
torch_dtype=torch.bfloat16,
3839
device_map="auto"
3940
)
40-
if model=='CodeLlama-7b-Instruct-hf':
41-
self.client = OpenAI(base_url="http://localhost:7878/v1", api_key='a_demo_api_key')
42-
if model=='CodeLlama-13b-Instruct-hf':
43-
self.client = OpenAI(base_url="http://localhost:7879/v1", api_key='a_demo_api_key')
41+
# if model=='CodeLlama-7b-Instruct-hf':
42+
# self.client = OpenAI(base_url="http://localhost:7878/v1", api_key='a_demo_api_key')
43+
# if model=='CodeLlama-13b-Instruct-hf':
44+
# self.client = OpenAI(base_url="http://localhost:7879/v1", api_key='a_demo_api_key')
4445
if model=='meta-llama-3-8B-instruct':
4546
self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/meta-llama-3-8B-instruct")
4647
self.llama3 = AutoModelForCausalLM.from_pretrained(
@@ -110,27 +111,14 @@ def __init__(self, model, temperature, top_p, seed, max_tokens, dry_run):
110111
api_key = os.environ['ANTHROPIC_API_KEY']
111112
self.client = anthropic.Anthropic(api_key=api_key)
112113

113-
if 'gpt' in model:
114-
if model=='gpt-4o-2024-05-13':
115-
api_key = os.environ['OPENAI_PROJ_API_KEY']
116-
self.client = OpenAI(api_key=api_key)
117-
elif model=='gpt-35-turbo':
118-
deploymentName = 'gpt-35-turbo'
119-
endpoint = 'redmond/interactive'
120-
121-
API_KEY = os.environ['OPENAI_AZURE_API_KEY']
122-
API_VERSION = '2024-02-01'
123-
BASE_URL = 'https://trapi.research.microsoft.com/' + endpoint
124-
DEPLOYMENT_ID = deploymentName
125-
126-
self.client = AzureOpenAI(
127-
api_key=API_KEY,
128-
api_version=API_VERSION,
129-
azure_endpoint=BASE_URL,
130-
azure_deployment=DEPLOYMENT_ID
131-
)
132-
else:
133-
self.client = OpenAI(base_url="https://tnrllmproxy.azurewebsites.net/v1")
114+
# if 'gpt' in model:
115+
# if model == 'gpt-4o':
116+
# api_key = os.environ['OPENAI_PROJ_API_KEY']
117+
# self.client = OpenAI(api_key=api_key)
118+
# elif model == 'gpt-35-turbo':
119+
# self.client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
120+
# else:
121+
# self.client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
134122

135123
def generate_deepseek(self, messages):
136124
inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.deepseek.device)
@@ -219,14 +207,16 @@ def generate_claude(self, messages, max_retries=10, backoff_factor=1):
219207
if not system_prompt:
220208
while attempt < max_retries:
221209
try:
222-
response = self.client.messages.create(
210+
response = openai.ChatCompletion.create(
223211
model=self.model,
224212
temperature=self.temperature,
225213
top_p=self.top_p,
226214
max_tokens=self.max_tokens,
227215
messages=new_messages
228216
)
229-
return response.content[0].text
217+
218+
# Accessing the response content (text)
219+
return response['choices'][0]['message']['content']
230220
except Exception as e:
231221
attempt += 1
232222
wait_time = backoff_factor * (2 ** attempt)
@@ -235,15 +225,17 @@ def generate_claude(self, messages, max_retries=10, backoff_factor=1):
235225
else:
236226
while attempt < max_retries:
237227
try:
238-
response = self.client.messages.create(
228+
response = openai.ChatCompletion.create(
239229
model=self.model,
240230
temperature=self.temperature,
241231
top_p=self.top_p,
242-
system=system_prompt,
243232
max_tokens=self.max_tokens,
244233
messages=new_messages
245234
)
246-
return response.content[0].text
235+
236+
# Accessing the response content (text)
237+
return response['choices'][0]['message']['content']
238+
247239
except Exception as e:
248240
attempt += 1
249241
wait_time = backoff_factor * (2 ** attempt)

0 commit comments

Comments
 (0)
Please sign in to comment.