import requests import urllib.parse import re import html AUTH_BASE_URL = "https://auth.anny.eu" DEFAULT_HEADERS = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:140.0) Gecko/20100101 Firefox/140.0', 'accept': 'application/vnd.api+json', 'accept-encoding': 'plain' } def extract_html_value(text, pattern): match = re.search(pattern, text) return match.group(1) if match else None def debug_sso(): session = requests.Session() # 1. Init Headers like AnnySession session.headers.update({ **DEFAULT_HEADERS, 'accept': 'text/html, application/xhtml+xml', 'referer': AUTH_BASE_URL + '/', 'origin': AUTH_BASE_URL }) print("--- 1. Initial Access ---") r1 = session.get(f"{AUTH_BASE_URL}/login/sso") session.headers['X-XSRF-TOKEN'] = urllib.parse.unquote(r1.cookies['XSRF-TOKEN']) page_data_match = re.search(r'data-page="(.*?)"', r1.text) if not page_data_match: print("Could not find data-page") return page_data = page_data_match.group(1).replace('"', '"') version = re.search(r'"version"\s*:\s*"([a-f0-9]{32})"', page_data) x_inertia_version = version.group(1) if version else '66b32acea13402d3aef4488ccd239c93' session.headers.update({ 'x-requested-with': 'XMLHttpRequest', 'x-inertia': 'true', 'x-inertia-version': x_inertia_version }) print(f"\n--- 2. SSO Request (TUM) ---") r2 = session.post(f"{AUTH_BASE_URL}/login/sso", json={"domain": "tum.de"}) if 'x-inertia-location' not in r2.headers: print("No x-inertia-location header found") print(r2.text[:500]) return redirect_url = r2.headers['x-inertia-location'] print(f"Redirect URL: {redirect_url}") print("\n--- 3. Following Redirect (Simulating Provider Handoff) ---") headers_to_remove = ['x-requested-with', 'x-inertia', 'x-inertia-version'] for h in headers_to_remove: session.headers.pop(h, None) print("Cleaned Anny-specific headers.") current_url = redirect_url # Loop to handle intermediate pages (e.g. cookie check) for step in range(1, 6): print(f"\n--- Step {step}: GET {current_url} ---") if step == 1: resp = session.get(current_url) else: # We are posting from previous step pass # Logic handled inside loop # We need to render the response from the previous action. # But wait, the loop structure is easier if we just do "process page". pass # Re-writing loop logic for clarity resp = session.get(redirect_url) for step in range(1, 6): print(f"\n--- Step {step}: Processing Page {resp.url} ---") # Check for login fields if 'j_username' in resp.text: print("FOUND: j_username field. This is the LOGIN PAGE.") break # Check for error if "Cookies" in resp.text and "disabled" in resp.text: print("ERROR: Page says Cookies disabled.") # Extract form action action_url = extract_html_value(resp.text, r'form[^>]*action="([^"]+)"') if not action_url: print("No form action found. End of flow?") print(resp.text[:500]) break if action_url.startswith('/'): parsed = urllib.parse.urlparse(resp.url) base = f"{parsed.scheme}://{parsed.netloc}" action_url = base + action_url print(f"Form Action: {action_url}") # Extract inputs inputs = re.findall(r']*name="([^"]+)"[^>]*value="([^"]*)"', resp.text) data = {name: val for name, val in inputs} print(f"Hidden inputs: {list(data.keys())}") # If it's the cookie check page, there might be specific JS that auto-submits. # usually just posting the form works. if 'shib_idp_ls_success.shib_idp_session_ss' in data: print("Detected Shibboleth LocalStorage check.") # Submit to move to next page print("Submitting form to proceed...") # Ensure csrf if not in inputs if 'csrf_token' not in data: csrf = extract_html_value(resp.text, r'name="csrf_token" value="([^"]+)"') if csrf: data['csrf_token'] = csrf # Add basic things that might be needed data['_eventId_proceed'] = '' resp = session.post(action_url, data=data) else: print("Max steps reached without finding login page.") return # If we broke headers, we are at login page. print("\n--- Attempting Login on Final Page ---") action_url = extract_html_value(resp.text, r'form[^>]*action="([^"]+)"') if action_url: if action_url.startswith('/'): parsed = urllib.parse.urlparse(resp.url) base = f"{parsed.scheme}://{parsed.netloc}" action_url = base + action_url inputs = re.findall(r']*name="([^"]+)"[^>]*value="([^"]*)"', resp.text) data = {name: val for name, val in inputs} data['j_username'] = 'dummy_user' data['j_password'] = 'dummy_pass' data['_eventId_proceed'] = '' data['donotcache'] = '1' if 'csrf_token' not in data: csrf = extract_html_value(resp.text, r'name="csrf_token" value="([^"]+)"') if csrf: data['csrf_token'] = csrf print(f"Posting creds to {action_url}") r4 = session.post(action_url, data=data) print(f"Result Code: {r4.status_code}") unescaped = html.unescape(r4.text) if "Identifizierung gescheitert" in unescaped or "Authentication failed" in unescaped: print("SUCCESS: Got expected 'Authentication failed' message.") else: print("Result unknown.") print(unescaped[:500]) if __name__ == "__main__": debug_sso()