import requests
import urllib.parse
import re
import html

AUTH_BASE_URL = "https://auth.anny.eu"
DEFAULT_HEADERS = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:140.0) Gecko/20100101 Firefox/140.0',
    'accept': 'application/vnd.api+json',
    'accept-encoding': 'plain'
}

def extract_html_value(text, pattern):
    match = re.search(pattern, text)
    return match.group(1) if match else None

def debug_sso():
    session = requests.Session()
    # 1. Init Headers like AnnySession
    session.headers.update({
        **DEFAULT_HEADERS,
        'accept': 'text/html, application/xhtml+xml',
        'referer': AUTH_BASE_URL + '/',
        'origin': AUTH_BASE_URL
    })

    print("--- 1. Initial Access ---")
    r1 = session.get(f"{AUTH_BASE_URL}/login/sso")
    
    session.headers['X-XSRF-TOKEN'] = urllib.parse.unquote(r1.cookies['XSRF-TOKEN'])
    
    page_data_match = re.search(r'data-page="(.*?)"', r1.text)
    if not page_data_match:
        print("Could not find data-page")
        return
        
    page_data = page_data_match.group(1).replace('&quot;', '"')
    version = re.search(r'"version"\s*:\s*"([a-f0-9]{32})"', page_data)
    x_inertia_version = version.group(1) if version else '66b32acea13402d3aef4488ccd239c93'
    
    session.headers.update({
        'x-requested-with': 'XMLHttpRequest',
        'x-inertia': 'true',
        'x-inertia-version': x_inertia_version
    })
    
    print(f"\n--- 2. SSO Request (TUM) ---")
    r2 = session.post(f"{AUTH_BASE_URL}/login/sso", json={"domain": "tum.de"})
    
    if 'x-inertia-location' not in r2.headers:
        print("No x-inertia-location header found")
        print(r2.text[:500])
        return

    redirect_url = r2.headers['x-inertia-location']
    print(f"Redirect URL: {redirect_url}")
    
    print("\n--- 3. Following Redirect (Simulating Provider Handoff) ---")
    headers_to_remove = ['x-requested-with', 'x-inertia', 'x-inertia-version']
    for h in headers_to_remove:
        session.headers.pop(h, None)
    print("Cleaned Anny-specific headers.")

    current_url = redirect_url
    
    # Loop to handle intermediate pages (e.g. cookie check)
    for step in range(1, 6):
        print(f"\n--- Step {step}: GET {current_url} ---")
        if step == 1:
            resp = session.get(current_url)
        else:
            # We are posting from previous step
            pass 
            # Logic handled inside loop
        
        # We need to render the response from the previous action. 
        # But wait, the loop structure is easier if we just do "process page".
        pass 

    # Re-writing loop logic for clarity
    resp = session.get(redirect_url)
    
    for step in range(1, 6):
        print(f"\n--- Step {step}: Processing Page {resp.url} ---")
        
        # Check for login fields
        if 'j_username' in resp.text:
            print("FOUND: j_username field. This is the LOGIN PAGE.")
            break
        
        # Check for error
        if "Cookies" in resp.text and "disabled" in resp.text:
            print("ERROR: Page says Cookies disabled.")
        
        # Extract form action
        action_url = extract_html_value(resp.text, r'form[^>]*action="([^"]+)"')
        if not action_url:
            print("No form action found. End of flow?")
            print(resp.text[:500])
            break
            
        if action_url.startswith('/'):
            parsed = urllib.parse.urlparse(resp.url)
            base = f"{parsed.scheme}://{parsed.netloc}"
            action_url = base + action_url
            
        print(f"Form Action: {action_url}")
        
        # Extract inputs
        inputs = re.findall(r'<input[^>]*name="([^"]+)"[^>]*value="([^"]*)"', resp.text)
        data = {name: val for name, val in inputs}
        print(f"Hidden inputs: {list(data.keys())}")
        
        # If it's the cookie check page, there might be specific JS that auto-submits.
        # usually just posting the form works.
        
        if 'shib_idp_ls_success.shib_idp_session_ss' in data:
            print("Detected Shibboleth LocalStorage check.")
        
        # Submit to move to next page
        print("Submitting form to proceed...")
        # Ensure csrf if not in inputs
        if 'csrf_token' not in data:
             csrf = extract_html_value(resp.text, r'name="csrf_token" value="([^"]+)"')
             if csrf: data['csrf_token'] = csrf
             
        # Add basic things that might be needed
        data['_eventId_proceed'] = ''
        
        resp = session.post(action_url, data=data)
        
    else:
        print("Max steps reached without finding login page.")
        return

    # If we broke headers, we are at login page.
    print("\n--- Attempting Login on Final Page ---")
    action_url = extract_html_value(resp.text, r'form[^>]*action="([^"]+)"')
    if action_url:
        if action_url.startswith('/'):
             parsed = urllib.parse.urlparse(resp.url)
             base = f"{parsed.scheme}://{parsed.netloc}"
             action_url = base + action_url
             
        inputs = re.findall(r'<input[^>]*name="([^"]+)"[^>]*value="([^"]*)"', resp.text)
        data = {name: val for name, val in inputs}
        
        data['j_username'] = 'dummy_user'
        data['j_password'] = 'dummy_pass'
        data['_eventId_proceed'] = ''
        data['donotcache'] = '1'
        
        if 'csrf_token' not in data:
             csrf = extract_html_value(resp.text, r'name="csrf_token" value="([^"]+)"')
             if csrf: data['csrf_token'] = csrf
             
        print(f"Posting creds to {action_url}")
        r4 = session.post(action_url, data=data)
        print(f"Result Code: {r4.status_code}")
        unescaped = html.unescape(r4.text)
        if "Identifizierung gescheitert" in unescaped or "Authentication failed" in unescaped:
            print("SUCCESS: Got expected 'Authentication failed' message.")
        else:
             print("Result unknown.")
             print(unescaped[:500])


if __name__ == "__main__":
    debug_sso()