hippotat: Convert an explicitly configured URL to ASCII.
[hippotat] / hippotat
1 #!/usr/bin/python3
2 #
3 # Hippotat - Asinine IP Over HTTP program
4 # ./hippotat - client main program
5 #
6 # Copyright 2017 Ian Jackson
7 #
8 # GPLv3+
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the GNU General Public License
21 # along with this program, in the file GPLv3. If not,
22 # see <http://www.gnu.org/licenses/>.
23
24 #@ import sys; sys.path.append('@PYBUILD_INSTALL_DIR@')
25 from hippotatlib import *
26
27 import twisted.web
28 import twisted.web.client
29 import urllib.parse
30
31 import io
32
33 class GeneralResponseConsumer(twisted.internet.protocol.Protocol):
34 def __init__(self, cl, req, resp, desc):
35 self._cl = cl
36 self._req = req
37 self._resp = resp
38 self._desc = desc
39
40 def _log(self, dflag, msg, **kwargs):
41 self._cl.log(dflag, '%s: %s' % (self._desc, msg), idof=self._req, **kwargs)
42
43 def connectionMade(self):
44 self._log(DBG.HTTP_CTRL, 'connectionMade')
45
46 def connectionLostOK(self, reason):
47 return (reason.check(twisted.web.client.ResponseDone) or
48 reason.check(twisted.web.client.PotentialDataLoss))
49 # twisted.web.client.PotentialDataLoss is an entirely daft
50 # exception. It will occur every time if the origin server does
51 # not provide a Content-Length. (hippotatd does, of course, but
52 # the HTTP transaction might be proxied.)
53
54 class ResponseConsumer(GeneralResponseConsumer):
55 def __init__(self, cl, req, resp):
56 super().__init__(cl, req, resp, 'RC')
57 ssddesc = '[%s] %s' % (id(req), self._desc)
58 self._ssd = SlipStreamDecoder(ssddesc, partial(queue_inbound, cl.ipif))
59 self._log(DBG.HTTP_CTRL, '__init__')
60
61 def dataReceived(self, data):
62 self._log(DBG.HTTP, 'dataReceived', d=data)
63 try:
64 self._ssd.inputdata(data)
65 except Exception as e:
66 self._handleexception()
67
68 def connectionLost(self, reason):
69 reason_msg = 'connectionLost ' + str(reason)
70 self._log(DBG.HTTP_CTRL, reason_msg)
71 if not self.connectionLostOK(reason):
72 self._latefailure(reason_msg)
73 return
74 try:
75 self._log(DBG.HTTP, 'ResponseDone')
76 self._ssd.flush()
77 self._cl.req_fin(self._req)
78 except Exception as e:
79 self._handleexception()
80 self._cl.report_running()
81
82 def _handleexception(self):
83 self._latefailure(traceback.format_exc())
84
85 def _latefailure(self, reason):
86 self._log(DBG.HTTP_CTRL, '_latefailure ' + str(reason))
87 self._cl.req_err(self._req, reason)
88
89 class ErrorResponseConsumer(GeneralResponseConsumer):
90 def __init__(self, cl, req, resp):
91 super().__init__(cl, req, resp, 'ERROR-RC')
92 self._m = b''
93 try:
94 self._phrase = resp.phrase.decode('utf-8')
95 except Exception:
96 self._phrase = repr(resp.phrase)
97 self._log(DBG.HTTP_CTRL, '__init__ %d %s' % (resp.code, self._phrase))
98
99 def dataReceived(self, data):
100 self._log(DBG.HTTP_CTRL, 'dataReceived ' + repr(data))
101 self._m += data
102
103 def connectionLost(self, reason):
104 try:
105 mbody = self._m.decode('utf-8')
106 except Exception:
107 mbody = repr(self._m)
108 if not self.connectionLostOK(reason):
109 mbody += ' || ' + str(reason)
110 self._cl.req_err(self._req,
111 "FAILED %d %s | %s"
112 % (self._resp.code, self._phrase, mbody))
113
114 class Client():
115 def __init__(cl, c,ss,cs):
116 cl.c = c
117 cl.outstanding = { }
118 cl.desc = '[%s %s] ' % (ss,cs)
119 cl.running_reported = False
120 cl.log_info('setting up')
121
122 def log_info(cl, msg):
123 log.info(cl.desc + msg, dflag=False)
124
125 def report_running(cl):
126 if not cl.running_reported:
127 cl.log_info('running OK')
128 cl.running_reported = True
129
130 def log(cl, dflag, msg, **kwargs):
131 log_debug(dflag, cl.desc + msg, **kwargs)
132
133 def log_outstanding(cl):
134 cl.log(DBG.CTRL_DUMP, 'OS %s' % cl.outstanding)
135
136 def start(cl):
137 cl.queue = PacketQueue('up', cl.c.max_queue_time)
138 cl.agent = twisted.web.client.Agent(
139 reactor, connectTimeout = cl.c.http_timeout)
140
141 def outbound(cl, packet, saddr, daddr):
142 #print('OUT ', saddr, daddr, repr(packet))
143 cl.queue.append(packet)
144 cl.check_outbound()
145
146 def req_ok(cl, req, resp):
147 cl.log(DBG.HTTP_CTRL,
148 'req_ok %d %s %s' % (resp.code, repr(resp.phrase), str(resp)),
149 idof=req)
150 if resp.code == 200:
151 rc = ResponseConsumer(cl, req, resp)
152 else:
153 rc = ErrorResponseConsumer(cl, req, resp)
154
155 resp.deliverBody(rc)
156 # now rc is responsible for calling req_fin
157
158 def req_err(cl, req, err):
159 # called when the Deferred fails, or (if it completes),
160 # later, by ResponsConsumer or ErrorResponsConsumer
161 try:
162 cl.log(DBG.HTTP_CTRL, 'req_err ' + str(err), idof=req)
163 cl.running_reported = False
164 if isinstance(err, twisted.python.failure.Failure):
165 err = err.getTraceback()
166 print('%s[%#x] %s' % (cl.desc, id(req), err.strip('\n').replace('\n',' / ')),
167 file=sys.stderr)
168 if not isinstance(cl.outstanding[req], int):
169 raise RuntimeError('[%#x] previously %s' %
170 (id(req), cl.outstanding[req]))
171 cl.outstanding[req] = err
172 cl.log_outstanding()
173 reactor.callLater(cl.c.http_retry, partial(cl.req_fin, req))
174 except Exception as e:
175 crash(traceback.format_exc() + '\n----- handling -----\n' + err)
176
177 def req_fin(cl, req):
178 del cl.outstanding[req]
179 cl.log(DBG.HTTP_CTRL, 'req_fin OS=%d' % len(cl.outstanding), idof=req)
180 cl.check_outbound()
181
182 def check_outbound(cl):
183 while True:
184 if len(cl.outstanding) >= cl.c.max_outstanding:
185 break
186
187 if (not cl.queue.nonempty() and
188 len(cl.outstanding) >= cl.c.target_requests_outstanding):
189 break
190
191 d = b''
192 def moredata(s): nonlocal d; d += s
193 cl.queue.process((lambda: len(d)),
194 moredata,
195 cl.c.max_batch_up)
196
197 d = mime_translate(d)
198
199 token = authtoken_make(cl.c.secret)
200
201 crlf = b'\r\n'
202 lf = b'\n'
203 mime = (b'--b' + crlf +
204 b'Content-Type: text/plain; charset="utf-8"' + crlf +
205 b'Content-Disposition: form-data; name="m"' + crlf + crlf +
206 str(cl.c.client) .encode('ascii') + crlf +
207 token + crlf +
208 str(cl.c.target_requests_outstanding)
209 .encode('ascii') + crlf +
210 str(cl.c.http_timeout) .encode('ascii') + crlf +
211 ((
212 b'--b' + crlf +
213 b'Content-Type: application/octet-stream' + crlf +
214 b'Content-Disposition: form-data; name="d"' + crlf + crlf +
215 d + crlf
216 ) if len(d) else b'') +
217 b'--b--' + crlf)
218
219 #df = open('data.dump.dbg', mode='wb')
220 #df.write(mime)
221 #df.close()
222 # POST -use -c 'multipart/form-data; boundary="b"' http://localhost:8099/ <data.dump.dbg
223
224 cl.log(DBG.HTTP_FULL, 'requesting: ' + str(mime))
225
226 hh = { 'User-Agent': ['hippotat'],
227 'Content-Type': ['multipart/form-data; boundary="b"'] }
228
229 bytesreader = io.BytesIO(mime)
230 producer = twisted.web.client.FileBodyProducer(bytesreader)
231
232 req = cl.agent.request(b'POST',
233 cl.c.url,
234 twisted.web.client.Headers(hh),
235 producer)
236
237 cl.outstanding[req] = len(d)
238 cl.log(DBG.HTTP_CTRL,
239 'request OS=%d' % len(cl.outstanding),
240 idof=req, d=d)
241 req.addTimeout(cl.c.http_timeout, reactor)
242 req.addCallback(partial(cl.req_ok, req))
243 req.addErrback(partial(cl.req_err, req))
244
245 cl.log_outstanding()
246
247 clients = [ ]
248
249 def encode_url(urlstr):
250 # Oh, this is a disaster. We're given a URL as a `str', but the underlying
251 # machinery insists on having `bytes'. Assume we've been given a sensible
252 # URL, with escaping in all of the necessary places, except that it may
253 # contain non-ASCII characters: then encode as UTF-8 and squash the top-
254 # bit-set bytes down to percent escapes.
255 #
256 # This conses like it's going out of fashion, but it gets the job done.
257 return b''.join(bytes([b]) if b < 128 else '%%%02X' % b
258 for b in urlstr.encode('utf-8'))
259
260 def process_cfg(_opts, putative_servers, putative_clients):
261 global clients
262
263 for ss in putative_servers.values():
264 for (ci,cs) in putative_clients.items():
265 c = ConfigResults()
266
267 sections = cfg_process_client_common(c,ss,cs,ci)
268 if not sections: continue
269
270 log_debug_config('processing client [%s %s]' % (ss, cs))
271
272 def srch(getter,key): return cfg_search(getter,key,sections)
273
274 c.http_timeout += srch(cfg.getint, 'http_timeout_grace')
275 c.max_outstanding = srch(cfg.getint, 'max_requests_outstanding')
276 c.max_batch_up = srch(cfg.getint, 'max_batch_up')
277 c.http_retry = srch(cfg.getint, 'http_retry')
278 c.max_queue_time = srch(cfg.getint, 'max_queue_time')
279 c.vroutes = srch(cfg.get, 'vroutes')
280
281 try: c.ifname = srch(cfg_get_raw, 'ifname_client')
282 except NoOptionError: pass
283
284 try: c.url = encode_url(srch(cfg.get,'url'))
285 except NoOptionError:
286 cfg_process_saddrs(c, ss)
287 c.url = c.saddrs[0].url()
288
289 c.client = ci
290
291 cfg_process_vaddr(c,ss)
292
293 cfg_process_ipif(c,
294 sections,
295 (('local','client'),
296 ('peer', 'vaddr'),
297 ('rnets','vroutes')))
298
299 clients.append(Client(c,ss,cs))
300
301 common_startup(process_cfg)
302
303 for cl in clients:
304 cl.start()
305 cl.ipif = start_ipif(cl.c.ipif_command, cl.outbound)
306 cl.check_outbound()
307
308 common_run()