# load libraries for google
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd


# Load sentence encoder from Google
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" 
model = hub.load(module_url)

# Basic embed function
def embed(input):
  return model(input)
  

# load data # you may have to change your directory on this given the shell commands above
data = pd.read_csv("sent_to_enc.csv")

# get the sentence/paragraph documents
docs = data['sentence'].tolist()

# this may error? it's all the same data type so my assumption is I don't need to use list()
doc_labs = data['doc_id']

# get document embeddings from google
sent_use_embeddings = embed(docs)

# now I would just like two csvs with each embedding array along with some labels
# I'm making things up but this may work (not sure if google outputs an array)
google_out_data = pd.DataFrame(data = sent_use_embeddings, index = doc_labs)

fb_out_data = pd.DataFrame(data = fb_doc_embeddings, index = doc_labs)

# my attempt to output a csv
google_out_data.to_csv(index=True)
fb_out_data.to_csv(index=True)