python value iteration算法玩倒立摆(inverted pendulum)
生活随笔
收集整理的這篇文章主要介紹了
python value iteration算法玩倒立摆(inverted pendulum)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
最近用value iteration的方法實現了一下倒立擺,看一下效果:
pendulum
我這里分享一下我的實現,倒立擺的代碼為:
# a few packages we need to importimport numpy as np import matplotlib.pyplot as plt import matplotlib import matplotlib.animation as animation import IPython class Pendulum:"""This class describes an inverted pendulum and provides some helper functions"""def __init__(self):"""constructor of the class"""#gravity constantself.g=9.81# number of dimensions (angle and angular velocity)self.state_dims = 2# the maximum velocityself.vmax = 6.# the range of allowable statesself.state_range = np.array([[0, 2*np.pi],[-self.vmax, self.vmax]])#simulation stepself.delta_t = 0.1# internal integration stepself._internaldt = 0.01self._integration_ratio = int(round(self.delta_t / self._internaldt))def next_state(self,x,u):"""This function integrates the pendulum for one step of self.delta_t secondsInputs:x: state of the pendulum (x,v) as a 2D numpy arrayu: control as a scalarOutput:the state of the pendulum as a 2D numpy array at the end of the integration"""x_next = x[0]v_next = x[1]for i in range(self._integration_ratio):xx_next = (x_next + self._internaldt * v_next)%(2*np.pi)v_next = np.clip(v_next + self._internaldt * (u-self.g*np.sin(x_next)), -self.vmax, self.vmax)x_next = xx_nextreturn np.array([x_next,v_next])def simulate(self, x0, policy, T):"""This function simulates the pendulum for T seconds from initial state x0 using a policy(policy is called as policy(x) and returns one control)Inputs:x0: the initial conditions of the pendulum as a 2D array (angle and velocity)T: the time to integrate forOutput:x (2D array) and u (1D array) containing the time evolution of states and control"""horizon_length = int(T/self.delta_t)x=np.empty([2, horizon_length+1])x[:,0] = x0u=np.empty([horizon_length])for i in range(horizon_length):u[i] = policy(x[:,i])x[:,i+1] = self.next_state(x[:,i], u[i])return x, udef animate_robot(self, x, dt = 0.01):"""This function makes an animation showing the behavior of the pendulumtakes as input the result of a simulation - dt is the sampling time (0.1s normally)"""# here we check if we need to down-sample the data for display#downsampling (we want 100ms DT or higher)min_dt = 0.1if(dt < min_dt):steps = int(min_dt/dt)use_dt = int(min_dt * 1000)else:steps = 1use_dt = int(dt * 1000)plotx = x[:,::steps]fig = matplotlib.figure.Figure(figsize=[6,6])matplotlib.backends.backend_agg.FigureCanvasAgg(fig)ax = fig.add_subplot(111, autoscale_on=False, xlim=[-1.3,1.3], ylim=[-1.3,1.3])ax.grid()list_of_lines = []#create the cart poleline, = ax.plot([], [], 'k', lw=2)list_of_lines.append(line)line, = ax.plot([], [], 'o', lw=2)list_of_lines.append(line)cart_height = 0.25def animate(i):for l in list_of_lines: #reset all linesl.set_data([],[])x_pend = np.sin(plotx[0,i])y_pend = -np.cos(plotx[0,i])list_of_lines[0].set_data([0., x_pend], [0., y_pend])list_of_lines[1].set_data([x_pend, x_pend], [y_pend, y_pend])return list_of_linesdef init():return animate(0)ani = animation.FuncAnimation(fig, animate, np.arange(0, len(plotx[0,:])),interval=use_dt, blit=True, init_func=init)plt.close(fig)plt.close(ani._fig)IPython.display.display_html(IPython.core.display.HTML(ani.to_html5_video()))value iteration
定義配置和輔助函數
nq=50 nv=50 nu = 3 v_max = 6 u_max=5 # create lookup tables for discretized states u_table = np.linspace(-u_max, u_max, nu) q_table = np.linspace(0., 2*np.pi, nq, endpoint=False) v_table = np.linspace(-v_max, v_max, nv)num_states = nq * nvrobot = Pendulum()def get_index(x):ind_q = np.argmin((x[0]-q_table)**2)ind_v = np.argmin((x[1]-v_table)**2)return ind_q + ind_v*nqdef get_states(index):iv,ix = np.divmod(index, nq)return np.array([q_table[ix], v_table[iv]]) next_state_index = np.empty([num_states, nu], dtype=np.int32) for i in range(num_states):for k in range(nu):x_next = robot.next_state(get_states(i),u_table[k])next_state_index[i,k] = get_index(x_next)定義代價函數
def cost(x,u):"""a cost function for the inverted pendulum"""return (x[0]-np.pi)**2 + 0.01*x[1]**2 + 0.0001*u**2value iteration的實現
class ValueIteration:"""This class is used to implement value iteration and store the state of the value function and policyas we iterate"""def __init__(self, model,num_states, cost, discount_factor=0.99):"""receives as input a pendulum and cost function and potentially a discount factor"""# value function stored as a 1D array (indexed as we indexed states in pendulum)self.num_states=num_statesself.value_function = np.zeros([self.num_states])# we also store the policy similarlyself.policy = np.zeros([self.num_states])# references to the pendulum and cost functionself.model = modelself.cost = cost#discount factor for costself.gamma = discount_factordef iterate(self,num_states,nu,u_table, num_iter=1):"""the main iteration of value iterationnum_iter: maximum number of iterations to be performed. If after an iteration the value function does not change (e.g. less thant 10e-5)the function returns and print success"""for i in range(num_iter):J_new = self.value_function.copy()for j in range(num_states):#for each possible control input we compute the costr = np.zeros([nu])for l in range(nu):# the current states and controlx = get_states(j)u = u_table[l]# the index for the next statenext_index = next_state_index[j,l]#compute the costr[l] = self.cost(x, u) + self.gamma*self.value_function[next_index]# we take the smallest cost value to update the value functionJ_new[j] = np.min(r)#here we also store the policy (so we have it for later)self.policy[j] = u_table[np.argmin(r)]#we update the current value function if there is any change otherwise we are doneif ((self.value_function-J_new)**2 < 10e-2).all():print("CONVERGED after iteration " + str(i))breakelse:self.value_function = J_new.copy()訓練
# we instanciate a value iteration object for a pendulum model and a cost function value_iteration = ValueIteration(robot,num_states, cost)# we run the iterations (with maximum number 2000). value_iteration.iterate(num_states,nu,u_table,2000)可視化
my_policy=value_iteration.policydef policy(x): # print(x)index=get_index(x)return my_policy[index] def plot_results(robot, value_function, policy,my_policy, animate=True):"""This function plots the results. It displays the value function, the policy for all states.Then it integrates the pendulum from state [0,0] and displays the states and control as a function of timeFinally it shows an animation of the result"""x0 = np.array([0.,0.])x, u = robot.simulate(x0, policy, 20)if animate:robot.animate_robot(x, robot.delta_t) plot_results(robot, value_iteration.value_function, policy,my_policy, animate=True)后續把跑的視頻擺出來,哈哈哈,今天分享就到這里,這是實踐代碼,理論部分就不講了哈。
總結
以上是生活随笔為你收集整理的python value iteration算法玩倒立摆(inverted pendulum)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 用Python批量从本地导数据到post
- 下一篇: C#--扁平化窗体UI