diff --git a/README.md b/README.md index 45f3cc4..e4e62f4 100644 --- a/README.md +++ b/README.md @@ -1 +1,31 @@ -# YAO-agent \ No newline at end of file +# YAO-agent + + +```bash +bin/kafka-topics.sh \ + --describe \ + --zookeeper zookeeper_node1:2181,zookeeper_node2:2181,zookeeper_node3:2181 \ + --topic yao +``` + +```bash +bin/kafka-topics.sh \ + --create \ + --zookeeper zookeeper_node1:2181,zookeeper_node2:2181,zookeeper_node3:2181 \ + --replication-factor 3 \ + --partitions 1 \ + --topic yao +``` + +```bash +bin/kafka-console-consumer.sh \ + --bootstrap-server kafka_node1:9091,kafka_node2:9092,kafka_node3:9093 \ + --topic yao \ + --from-beginning +``` + +```bash +bin/kafka-console-producer.sh \ + --broker-list kafka_node1:9091,kafka_node2:9092,kafka_node3:9093 \ + --topic yao +``` \ No newline at end of file diff --git a/executor.py b/executor.py index 65e5f15..f01073d 100644 --- a/executor.py +++ b/executor.py @@ -3,8 +3,11 @@ import docker def run(): client = docker.from_env() - #print(client.containers.run(image="alpine", command="echo 'Hello World'", environment={"KEY": "value"})) - print(client.containers.run(image="nvidia/cuda:9.0-base", command="nvidia-smi", environment={"KEY": "value"}, runtime="nvidia")) + try: + print(client.containers.run(image="alpine", command="nvid", environment={"KEY": "value"})) + # print(client.containers.run(image="nvidia/cuda:9.0-base", command="nvidia-smi", environment={"KEY": "value"}, runtime="nvidia")) + except Exception as e: + print(e.__class__.__name__, e) def run_in_background(): @@ -19,4 +22,31 @@ def list_containers(): print(container.id) -run() +def get_logs(id): + try: + client = docker.from_env() + container = client.containers.get(id) + print(container.logs().decode()) + except Exception as e: + print(e) + + +def get_status(id): + client = docker.from_env() + container = client.containers.list(all=True, filters={'id': id}) + status = {} + if len(container) > 0: + container= container[0] + status['id'] = container.short_id + status['image'] = container.attrs['Config']['Image'] + status['image_digest'] = container.attrs['Image'] + status['command'] = container.attrs['Config']['Cmd'] + status['createdAt'] = container.attrs['Created'] + status['finishedAt'] = container.attrs['State']['FinishedAt'] + status['status'] = container.status + if status['command'] is not None: + status['command'] = ' '.join(container.attrs['Config']['Cmd']) + print(status) + + +get_status('') diff --git a/server.py b/server.py index d91d045..748cf04 100644 --- a/server.py +++ b/server.py @@ -25,10 +25,38 @@ class MyHandler(BaseHTTPRequestHandler): elif req.path == "/logs": id = query['id'][0] - client = docker.from_env() - container = client.containers.get(id) + try: + client = docker.from_env() + container = client.containers.get(id) + msg = {'code': 0, 'logs': str(container.logs().decode())} + except Exception as e: + msg = {'code': 0, 'error': e} + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(bytes(json.dumps(msg), "utf-8")) - msg = {'code': 0, 'logs': container.logs().decode()} + elif req.path == "/status": + id = query['id'][0] + client = docker.from_env() + container = client.containers.list(all=True, filters={'id': id}) + if len(container) > 0: + container = container[0] + print(container.image.attrs) + status = { + 'id': container.short_id, + 'image': container.attrs['Config']['Image'], + 'image_digest': container.attrs['Image'], + 'command': container.attrs['Config']['Cmd'], + 'createdAt': container.attrs['Created'], + 'finishedAt': container.attrs['State']['FinishedAt'], + 'status': container.status + } + if status['command'] is not None: + status['command'] = ' '.join(container.attrs['Config']['Cmd']) + msg = {'code': 0, 'status': status} + else: + msg = {'code': 1, 'error': "container not exist"} self.send_response(200) self.send_header('Content-type', 'application/json') self.end_headers() @@ -37,22 +65,21 @@ class MyHandler(BaseHTTPRequestHandler): else: self.send_error(404, 'File Not Found: %s' % self.path) - # Handler for the POST requests - def do_POST(self): - if self.path == "/create": - form = cgi.FieldStorage( - fp=self.rfile, - headers=self.headers, - environ={ - 'REQUEST_METHOD': 'POST', - 'CONTENT_TYPE': self.headers['Content-Type'], - }) - docker_image = form["image"].value - docker_cmd = form["cmd"].value - print(docker_image) - print(docker_cmd) +# Handler for the POST requests +def do_POST(self): + if self.path == "/create": + form = cgi.FieldStorage( + fp=self.rfile, + headers=self.headers, + environ={ + 'REQUEST_METHOD': 'POST', + 'CONTENT_TYPE': self.headers['Content-Type'], + }) + docker_image = form["image"].value + docker_cmd = form["cmd"].value + try: client = docker.from_env() container = client.containers.run( image=docker_image, @@ -61,35 +88,36 @@ class MyHandler(BaseHTTPRequestHandler): runtime="nvidia", detach=True ) - msg = {"code": 0, "id": container.id} + except Exception as e: + msg = {"code": 1, "error": e} - self.send_response(200) - self.send_header('Content-type', 'application/json') - self.end_headers() - self.wfile.write(bytes(json.dumps(msg), "utf-8")) + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(bytes(json.dumps(msg), "utf-8")) - elif self.path == "/stop": - form = cgi.FieldStorage( - fp=self.rfile, - headers=self.headers, - environ={ - 'REQUEST_METHOD': 'POST', - 'CONTENT_TYPE': self.headers['Content-Type'], - }) - id = form["id"].value + elif self.path == "/stop": + form = cgi.FieldStorage( + fp=self.rfile, + headers=self.headers, + environ={ + 'REQUEST_METHOD': 'POST', + 'CONTENT_TYPE': self.headers['Content-Type'], + }) + id = form["id"].value - client = docker.from_env() - container = client.containers.get(id) - container.stop() - msg = {"code": 0} + client = docker.from_env() + container = client.containers.get(id) + container.stop() + msg = {"code": 0} - self.send_response(200) - self.send_header('Content-type', 'application/json') - self.end_headers() - self.wfile.write(bytes(json.dumps(msg), "utf-8")) - else: - self.send_error(404, 'File Not Found: %s' % self.path) + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(bytes(json.dumps(msg), "utf-8")) + else: + self.send_error(404, 'File Not Found: %s' % self.path) try: @@ -104,5 +132,4 @@ try: except KeyboardInterrupt: print('^C received, shutting down the web server') - server.socket.close() diff --git a/yao-agent.py b/yao-agent.py index 95edac6..b568972 100644 --- a/yao-agent.py +++ b/yao-agent.py @@ -17,7 +17,6 @@ def main(): status, msg_gpu = execute(['nvidia-smi', '-q', '-x', '-f', 'status.xml']) if not status: print("execute failed, ", msg_gpu) - continue report_msg() time.sleep(interval) except Exception as e: @@ -44,7 +43,6 @@ def report_msg(): stat = { 'uuid': gpu.getElementsByTagName('uuid')[0].childNodes[0].data, 'product_name': gpu.getElementsByTagName('product_name')[0].childNodes[0].data, - 'fan_speed': gpu.getElementsByTagName('fan_speed')[0].childNodes[0].data, 'performance_state': gpu.getElementsByTagName('performance_state')[0].childNodes[0].data, 'memory_total': gpu.getElementsByTagName('fb_memory_usage')[0].getElementsByTagName('total')[0].childNodes[0].data, 'memory_free': gpu.getElementsByTagName('fb_memory_usage')[0].getElementsByTagName('free')[0].childNodes[0].data, @@ -55,7 +53,6 @@ def report_msg(): 'power_draw': gpu.getElementsByTagName('power_readings')[0].getElementsByTagName('power_draw')[0].childNodes[0].data } - stat['fan_speed'] = int(float(stat['fan_speed'].split(' ')[0])) stat['memory_total'] = int(float(stat['memory_total'].split(' ')[0])) stat['memory_free'] = int(float(stat['memory_free'].split(' ')[0])) stat['memory_used'] = int(float(stat['memory_used'].split(' ')[0]))