forked from v-sivak/quantum-control-rl
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain_PPO_Kerr_and_gate_time_sweep.py
73 lines (64 loc) · 2.28 KB
/
train_PPO_Kerr_and_gate_time_sweep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 8 19:22:36 2020
@author: Vladimir Sivak
"""
import os
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]='true'
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import numpy as np
import tensorflow as tf
from gkp.agents import PPO
from gkp.agents import actor_distribution_network_gkp
root_dir = r'E:\VladGoogleDrive\Qulab\GKP\sims\PPO\Kerr_sweep_4000\perfect_qubit_with_rotation_v2'
Kerr = np.array([1]+list(np.arange(5,55,5))) # Kerr in Hz
t_gate = 1.2e-6/np.sqrt(np.sqrt(Kerr)) # assume gate time scales as 1/(chi*alpha_c)
for i in range(len(Kerr)):
kwargs = {'K_osc' : Kerr[i], 't_gate' : t_gate[i], 'T1_osc' : 250e-6}
save_dir = os.path.join(root_dir,'K%d' %Kerr[i])
to_learn = {'alpha':True, 'beta':True, 'phi':False, 'theta': True}
simulate = 'oscillator'
tf.compat.v1.reset_default_graph() # to reset global_step used in PPO
PPO.train_eval(
root_dir = save_dir,
random_seed = 0,
# Params for collect
num_iterations = 4000,
train_batch_size = 1000,
replay_buffer_capacity = 70000,
# Params for train
normalize_observations = True,
normalize_rewards = False,
discount_factor = 1.0,
lr = 1e-4,
lr_schedule = None,
num_policy_epochs = 20,
initial_adaptive_kl_beta = 0.0,
kl_cutoff_factor = 0,
importance_ratio_clipping = 0.1,
value_pred_loss_coef = 0.005,
# Params for log, eval, save
eval_batch_size = 600,
eval_interval = 100,
save_interval = 1000,
checkpoint_interval = 5000,
summary_interval = 100,
# Params for environment
simulate = simulate,
horizon = 1,
clock_period = 6,
attention_step = 1,
train_episode_length = lambda x: 36 if x<1000 else 64,
eval_episode_length = 64,
reward_mode = 'pauli',
encoding = 'hexagonal',
quantum_circuit_type = 'v2',
action_script = 'hexagonal_phase_estimation_symmetric_6round',
to_learn = to_learn,
# Policy and value networks
ActorNet = actor_distribution_network_gkp.ActorDistributionNetworkGKP,
actor_fc_layers = (100,50),
value_fc_layers = (100,50),
use_rnn = False,
**kwargs
)